Committed by
GitHub
Add Dart API for MatchaTTS models (#1687)
正在显示
10 个修改的文件
包含
349 行增加
和
24 行删除
| @@ -4,6 +4,31 @@ set -ex | @@ -4,6 +4,31 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd tts | ||
| 8 | + | ||
| 9 | +echo '----------matcha tts----------' | ||
| 10 | +./run-matcha-zh.sh | ||
| 11 | +./run-matcha-en.sh | ||
| 12 | +ls -lh *.wav | ||
| 13 | +rm -rf matcha-icefall-* | ||
| 14 | +rm *.onnx | ||
| 15 | + | ||
| 16 | +echo '----------piper tts----------' | ||
| 17 | +./run-piper.sh | ||
| 18 | +rm -rf vits-piper-* | ||
| 19 | + | ||
| 20 | +echo '----------coqui tts----------' | ||
| 21 | +./run-coqui.sh | ||
| 22 | +rm -rf vits-coqui-* | ||
| 23 | + | ||
| 24 | +echo '----------zh tts----------' | ||
| 25 | +./run-vits-zh.sh | ||
| 26 | +rm -rf sherpa-onnx-* | ||
| 27 | + | ||
| 28 | +ls -lh *.wav | ||
| 29 | + | ||
| 30 | +popd # tts | ||
| 31 | + | ||
| 7 | pushd speaker-diarization | 32 | pushd speaker-diarization |
| 8 | echo '----------speaker diarization----------' | 33 | echo '----------speaker diarization----------' |
| 9 | ./run.sh | 34 | ./run.sh |
| @@ -106,22 +131,6 @@ rm -rf sherpa-onnx-* | @@ -106,22 +131,6 @@ rm -rf sherpa-onnx-* | ||
| 106 | 131 | ||
| 107 | popd # non-streaming-asr | 132 | popd # non-streaming-asr |
| 108 | 133 | ||
| 109 | -pushd tts | ||
| 110 | - | ||
| 111 | -echo '----------piper tts----------' | ||
| 112 | -./run-piper.sh | ||
| 113 | -rm -rf vits-piper-* | ||
| 114 | - | ||
| 115 | -echo '----------coqui tts----------' | ||
| 116 | -./run-coqui.sh | ||
| 117 | -rm -rf vits-coqui-* | ||
| 118 | - | ||
| 119 | -echo '----------zh tts----------' | ||
| 120 | -./run-zh.sh | ||
| 121 | -rm -rf sherpa-onnx-* | ||
| 122 | - | ||
| 123 | -popd # tts | ||
| 124 | - | ||
| 125 | pushd streaming-asr | 134 | pushd streaming-asr |
| 126 | 135 | ||
| 127 | echo '----------streaming zipformer ctc HLG----------' | 136 | echo '----------streaming zipformer ctc HLG----------' |
dart-api-examples/tts/bin/matcha-en.dart
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('acoustic-model', help: 'Path to the acoustic model') | ||
| 14 | + ..addOption('vocoder', help: 'Path to the vocoder model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption( | ||
| 17 | + 'data-dir', | ||
| 18 | + help: 'Path to espeak-ng-data directory', | ||
| 19 | + defaultsTo: '', | ||
| 20 | + ) | ||
| 21 | + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
| 22 | + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
| 23 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 24 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 25 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 26 | + ..addOption( | ||
| 27 | + 'sid', | ||
| 28 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 29 | + defaultsTo: '0', | ||
| 30 | + ); | ||
| 31 | + final res = parser.parse(arguments); | ||
| 32 | + if (res['acoustic-model'] == null || | ||
| 33 | + res['vocoder'] == null || | ||
| 34 | + res['tokens'] == null || | ||
| 35 | + res['data-dir'] == null || | ||
| 36 | + res['output-wav'] == null || | ||
| 37 | + res['text'] == null) { | ||
| 38 | + print(parser.usage); | ||
| 39 | + exit(1); | ||
| 40 | + } | ||
| 41 | + final acousticModel = res['acoustic-model'] as String; | ||
| 42 | + final vocoder = res['vocoder'] as String; | ||
| 43 | + final tokens = res['tokens'] as String; | ||
| 44 | + final dataDir = res['data-dir'] as String; | ||
| 45 | + final ruleFsts = res['rule-fsts'] as String; | ||
| 46 | + final ruleFars = res['rule-fars'] as String; | ||
| 47 | + final text = res['text'] as String; | ||
| 48 | + final outputWav = res['output-wav'] as String; | ||
| 49 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 50 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 51 | + | ||
| 52 | + if (speed == 0) { | ||
| 53 | + speed = 1.0; | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig( | ||
| 57 | + acousticModel: acousticModel, | ||
| 58 | + vocoder: vocoder, | ||
| 59 | + tokens: tokens, | ||
| 60 | + dataDir: dataDir, | ||
| 61 | + lengthScale: 1 / speed, | ||
| 62 | + ); | ||
| 63 | + | ||
| 64 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 65 | + matcha: matcha, | ||
| 66 | + numThreads: 1, | ||
| 67 | + debug: true, | ||
| 68 | + ); | ||
| 69 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 70 | + model: modelConfig, | ||
| 71 | + maxNumSenetences: 1, | ||
| 72 | + ruleFsts: ruleFsts, | ||
| 73 | + ruleFars: ruleFars, | ||
| 74 | + ); | ||
| 75 | + | ||
| 76 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 77 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 78 | + tts.free(); | ||
| 79 | + | ||
| 80 | + sherpa_onnx.writeWave( | ||
| 81 | + filename: outputWav, | ||
| 82 | + samples: audio.samples, | ||
| 83 | + sampleRate: audio.sampleRate, | ||
| 84 | + ); | ||
| 85 | + print('Saved to $outputWav'); | ||
| 86 | +} |
dart-api-examples/tts/bin/matcha-zh.dart
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('acoustic-model', help: 'Path to the acoustic model') | ||
| 14 | + ..addOption('vocoder', help: 'Path to the vocoder model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption('lexicon', help: 'Path to lexicon.txt') | ||
| 17 | + ..addOption( | ||
| 18 | + 'dict-dir', | ||
| 19 | + help: 'Path to jieba dict directory', | ||
| 20 | + defaultsTo: '', | ||
| 21 | + ) | ||
| 22 | + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
| 23 | + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
| 24 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 25 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 26 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 27 | + ..addOption( | ||
| 28 | + 'sid', | ||
| 29 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 30 | + defaultsTo: '0', | ||
| 31 | + ); | ||
| 32 | + final res = parser.parse(arguments); | ||
| 33 | + if (res['acoustic-model'] == null || | ||
| 34 | + res['vocoder'] == null || | ||
| 35 | + res['lexicon'] == null || | ||
| 36 | + res['tokens'] == null || | ||
| 37 | + res['dict-dir'] == null || | ||
| 38 | + res['output-wav'] == null || | ||
| 39 | + res['text'] == null) { | ||
| 40 | + print(parser.usage); | ||
| 41 | + exit(1); | ||
| 42 | + } | ||
| 43 | + final acousticModel = res['acoustic-model'] as String; | ||
| 44 | + final vocoder = res['vocoder'] as String; | ||
| 45 | + final lexicon = res['lexicon'] as String; | ||
| 46 | + final tokens = res['tokens'] as String; | ||
| 47 | + final dictDir = res['dict-dir'] as String; | ||
| 48 | + final ruleFsts = res['rule-fsts'] as String; | ||
| 49 | + final ruleFars = res['rule-fars'] as String; | ||
| 50 | + final text = res['text'] as String; | ||
| 51 | + final outputWav = res['output-wav'] as String; | ||
| 52 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 53 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 54 | + | ||
| 55 | + if (speed == 0) { | ||
| 56 | + speed = 1.0; | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + final matcha = sherpa_onnx.OfflineTtsMatchaModelConfig( | ||
| 60 | + acousticModel: acousticModel, | ||
| 61 | + vocoder: vocoder, | ||
| 62 | + lexicon: lexicon, | ||
| 63 | + tokens: tokens, | ||
| 64 | + dictDir: dictDir, | ||
| 65 | + lengthScale: 1 / speed, | ||
| 66 | + ); | ||
| 67 | + | ||
| 68 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 69 | + matcha: matcha, | ||
| 70 | + numThreads: 1, | ||
| 71 | + debug: true, | ||
| 72 | + ); | ||
| 73 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 74 | + model: modelConfig, | ||
| 75 | + maxNumSenetences: 1, | ||
| 76 | + ruleFsts: ruleFsts, | ||
| 77 | + ruleFars: ruleFars, | ||
| 78 | + ); | ||
| 79 | + | ||
| 80 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 81 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 82 | + tts.free(); | ||
| 83 | + | ||
| 84 | + sherpa_onnx.writeWave( | ||
| 85 | + filename: outputWav, | ||
| 86 | + samples: audio.samples, | ||
| 87 | + sampleRate: audio.sampleRate, | ||
| 88 | + ); | ||
| 89 | + print('Saved to $outputWav'); | ||
| 90 | +} |
dart-api-examples/tts/run-matcha-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +# please visit | ||
| 8 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 9 | +# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 10 | +# to download more models | ||
| 11 | +if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 12 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 13 | + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 14 | + rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 15 | +fi | ||
| 16 | + | ||
| 17 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +dart run \ | ||
| 22 | + ./bin/matcha-en.dart \ | ||
| 23 | + --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | ||
| 24 | + --vocoder ./hifigan_v2.onnx \ | ||
| 25 | + --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \ | ||
| 26 | + --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | ||
| 27 | + --sid 0 \ | ||
| 28 | + --speed 1.0 \ | ||
| 29 | + --output-wav matcha-en-1.wav \ | ||
| 30 | + --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." \ | ||
| 31 | + | ||
| 32 | +ls -lh *.wav |
dart-api-examples/tts/run-matcha-zh.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +# please visit | ||
| 8 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | ||
| 9 | +# to download more models | ||
| 10 | +if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
| 12 | + tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
| 13 | + rm matcha-icefall-zh-baker.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 17 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 18 | +fi | ||
| 19 | + | ||
| 20 | +dart run \ | ||
| 21 | + ./bin/matcha-zh.dart \ | ||
| 22 | + --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ | ||
| 23 | + --vocoder ./hifigan_v2.onnx \ | ||
| 24 | + --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ | ||
| 25 | + --tokens ./matcha-icefall-zh-baker/tokens.txt \ | ||
| 26 | + --dict-dir ./matcha-icefall-zh-baker/dict \ | ||
| 27 | + --rule-fsts ./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ | ||
| 28 | + --sid 0 \ | ||
| 29 | + --speed 1.0 \ | ||
| 30 | + --output-wav matcha-zh-1.wav \ | ||
| 31 | + --text "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" \ | ||
| 32 | + | ||
| 33 | +dart run \ | ||
| 34 | + ./bin/matcha-zh.dart \ | ||
| 35 | + --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ | ||
| 36 | + --vocoder ./hifigan_v2.onnx \ | ||
| 37 | + --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ | ||
| 38 | + --tokens ./matcha-icefall-zh-baker/tokens.txt \ | ||
| 39 | + --dict-dir ./matcha-icefall-zh-baker/dict \ | ||
| 40 | + --sid 0 \ | ||
| 41 | + --speed 1.0 \ | ||
| 42 | + --output-wav matcha-zh-2.wav \ | ||
| 43 | + --text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔." \ | ||
| 44 | + | ||
| 45 | +ls -lh *.wav |
| @@ -16,7 +16,7 @@ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then | @@ -16,7 +16,7 @@ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then | ||
| 16 | fi | 16 | fi |
| 17 | 17 | ||
| 18 | dart run \ | 18 | dart run \ |
| 19 | - ./bin/zh.dart \ | 19 | + ./bin/vits-zh.dart \ |
| 20 | --model ./sherpa-onnx-vits-zh-ll/model.onnx \ | 20 | --model ./sherpa-onnx-vits-zh-ll/model.onnx \ |
| 21 | --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ | 21 | --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ |
| 22 | --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ | 22 | --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ |
| @@ -24,10 +24,10 @@ dart run \ | @@ -24,10 +24,10 @@ dart run \ | ||
| 24 | --sid 2 \ | 24 | --sid 2 \ |
| 25 | --speed 1.0 \ | 25 | --speed 1.0 \ |
| 26 | --text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \ | 26 | --text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \ |
| 27 | - --output-wav zh-jieba-2.wav | 27 | + --output-wav vits-zh-jieba-2.wav |
| 28 | 28 | ||
| 29 | dart run \ | 29 | dart run \ |
| 30 | - ./bin/zh.dart \ | 30 | + ./bin/vits-zh.dart \ |
| 31 | --model ./sherpa-onnx-vits-zh-ll/model.onnx \ | 31 | --model ./sherpa-onnx-vits-zh-ll/model.onnx \ |
| 32 | --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ | 32 | --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ |
| 33 | --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ | 33 | --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ |
| @@ -36,6 +36,6 @@ dart run \ | @@ -36,6 +36,6 @@ dart run \ | ||
| 36 | --sid 3 \ | 36 | --sid 3 \ |
| 37 | --speed 1.0 \ | 37 | --speed 1.0 \ |
| 38 | --text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \ | 38 | --text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \ |
| 39 | - --output-wav zh-jieba-3.wav | 39 | + --output-wav vits-zh-jieba-3.wav |
| 40 | 40 | ||
| 41 | ls -lh *.wav | 41 | ls -lh *.wav |
| @@ -131,6 +131,22 @@ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct { | @@ -131,6 +131,22 @@ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct { | ||
| 131 | external Pointer<Utf8> dictDir; | 131 | external Pointer<Utf8> dictDir; |
| 132 | } | 132 | } |
| 133 | 133 | ||
| 134 | +final class SherpaOnnxOfflineTtsMatchaModelConfig extends Struct { | ||
| 135 | + external Pointer<Utf8> acousticModel; | ||
| 136 | + external Pointer<Utf8> vocoder; | ||
| 137 | + external Pointer<Utf8> lexicon; | ||
| 138 | + external Pointer<Utf8> tokens; | ||
| 139 | + external Pointer<Utf8> dataDir; | ||
| 140 | + | ||
| 141 | + @Float() | ||
| 142 | + external double noiseScale; | ||
| 143 | + | ||
| 144 | + @Float() | ||
| 145 | + external double lengthScale; | ||
| 146 | + | ||
| 147 | + external Pointer<Utf8> dictDir; | ||
| 148 | +} | ||
| 149 | + | ||
| 134 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { | 150 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { |
| 135 | external SherpaOnnxOfflineTtsVitsModelConfig vits; | 151 | external SherpaOnnxOfflineTtsVitsModelConfig vits; |
| 136 | @Int32() | 152 | @Int32() |
| @@ -140,6 +156,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct { | @@ -140,6 +156,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct { | ||
| 140 | external int debug; | 156 | external int debug; |
| 141 | 157 | ||
| 142 | external Pointer<Utf8> provider; | 158 | external Pointer<Utf8> provider; |
| 159 | + external SherpaOnnxOfflineTtsMatchaModelConfig matcha; | ||
| 143 | } | 160 | } |
| 144 | 161 | ||
| 145 | final class SherpaOnnxOfflineTtsConfig extends Struct { | 162 | final class SherpaOnnxOfflineTtsConfig extends Struct { |
| @@ -8,9 +8,9 @@ import './sherpa_onnx_bindings.dart'; | @@ -8,9 +8,9 @@ import './sherpa_onnx_bindings.dart'; | ||
| 8 | 8 | ||
| 9 | class OfflineTtsVitsModelConfig { | 9 | class OfflineTtsVitsModelConfig { |
| 10 | const OfflineTtsVitsModelConfig({ | 10 | const OfflineTtsVitsModelConfig({ |
| 11 | - required this.model, | 11 | + this.model = '', |
| 12 | this.lexicon = '', | 12 | this.lexicon = '', |
| 13 | - required this.tokens, | 13 | + this.tokens = '', |
| 14 | this.dataDir = '', | 14 | this.dataDir = '', |
| 15 | this.noiseScale = 0.667, | 15 | this.noiseScale = 0.667, |
| 16 | this.noiseScaleW = 0.8, | 16 | this.noiseScaleW = 0.8, |
| @@ -33,9 +33,37 @@ class OfflineTtsVitsModelConfig { | @@ -33,9 +33,37 @@ class OfflineTtsVitsModelConfig { | ||
| 33 | final String dictDir; | 33 | final String dictDir; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | +class OfflineTtsMatchaModelConfig { | ||
| 37 | + const OfflineTtsMatchaModelConfig({ | ||
| 38 | + this.acousticModel = '', | ||
| 39 | + this.vocoder = '', | ||
| 40 | + this.lexicon = '', | ||
| 41 | + this.tokens = '', | ||
| 42 | + this.dataDir = '', | ||
| 43 | + this.noiseScale = 0.667, | ||
| 44 | + this.lengthScale = 1.0, | ||
| 45 | + this.dictDir = '', | ||
| 46 | + }); | ||
| 47 | + | ||
| 48 | + @override | ||
| 49 | + String toString() { | ||
| 50 | + return 'OfflineTtsMatchaModelConfig(acousticModel: $acousticModel, vocoder: $vocoder, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, lengthScale: $lengthScale, dictDir: $dictDir)'; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + final String acousticModel; | ||
| 54 | + final String vocoder; | ||
| 55 | + final String lexicon; | ||
| 56 | + final String tokens; | ||
| 57 | + final String dataDir; | ||
| 58 | + final double noiseScale; | ||
| 59 | + final double lengthScale; | ||
| 60 | + final String dictDir; | ||
| 61 | +} | ||
| 62 | + | ||
| 36 | class OfflineTtsModelConfig { | 63 | class OfflineTtsModelConfig { |
| 37 | const OfflineTtsModelConfig({ | 64 | const OfflineTtsModelConfig({ |
| 38 | - required this.vits, | 65 | + this.vits = const OfflineTtsVitsModelConfig(), |
| 66 | + this.matcha = const OfflineTtsMatchaModelConfig(), | ||
| 39 | this.numThreads = 1, | 67 | this.numThreads = 1, |
| 40 | this.debug = true, | 68 | this.debug = true, |
| 41 | this.provider = 'cpu', | 69 | this.provider = 'cpu', |
| @@ -43,10 +71,11 @@ class OfflineTtsModelConfig { | @@ -43,10 +71,11 @@ class OfflineTtsModelConfig { | ||
| 43 | 71 | ||
| 44 | @override | 72 | @override |
| 45 | String toString() { | 73 | String toString() { |
| 46 | - return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)'; | 74 | + return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, numThreads: $numThreads, debug: $debug, provider: $provider)'; |
| 47 | } | 75 | } |
| 48 | 76 | ||
| 49 | final OfflineTtsVitsModelConfig vits; | 77 | final OfflineTtsVitsModelConfig vits; |
| 78 | + final OfflineTtsMatchaModelConfig matcha; | ||
| 50 | final int numThreads; | 79 | final int numThreads; |
| 51 | final bool debug; | 80 | final bool debug; |
| 52 | final String provider; | 81 | final String provider; |
| @@ -99,6 +128,16 @@ class OfflineTts { | @@ -99,6 +128,16 @@ class OfflineTts { | ||
| 99 | c.ref.model.vits.lengthScale = config.model.vits.lengthScale; | 128 | c.ref.model.vits.lengthScale = config.model.vits.lengthScale; |
| 100 | c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8(); | 129 | c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8(); |
| 101 | 130 | ||
| 131 | + c.ref.model.matcha.acousticModel = | ||
| 132 | + config.model.matcha.acousticModel.toNativeUtf8(); | ||
| 133 | + c.ref.model.matcha.vocoder = config.model.matcha.vocoder.toNativeUtf8(); | ||
| 134 | + c.ref.model.matcha.lexicon = config.model.matcha.lexicon.toNativeUtf8(); | ||
| 135 | + c.ref.model.matcha.tokens = config.model.matcha.tokens.toNativeUtf8(); | ||
| 136 | + c.ref.model.matcha.dataDir = config.model.matcha.dataDir.toNativeUtf8(); | ||
| 137 | + c.ref.model.matcha.noiseScale = config.model.matcha.noiseScale; | ||
| 138 | + c.ref.model.matcha.lengthScale = config.model.matcha.lengthScale; | ||
| 139 | + c.ref.model.matcha.dictDir = config.model.matcha.dictDir.toNativeUtf8(); | ||
| 140 | + | ||
| 102 | c.ref.model.numThreads = config.model.numThreads; | 141 | c.ref.model.numThreads = config.model.numThreads; |
| 103 | c.ref.model.debug = config.model.debug ? 1 : 0; | 142 | c.ref.model.debug = config.model.debug ? 1 : 0; |
| 104 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 143 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| @@ -112,6 +151,12 @@ class OfflineTts { | @@ -112,6 +151,12 @@ class OfflineTts { | ||
| 112 | calloc.free(c.ref.ruleFars); | 151 | calloc.free(c.ref.ruleFars); |
| 113 | calloc.free(c.ref.ruleFsts); | 152 | calloc.free(c.ref.ruleFsts); |
| 114 | calloc.free(c.ref.model.provider); | 153 | calloc.free(c.ref.model.provider); |
| 154 | + calloc.free(c.ref.model.matcha.dictDir); | ||
| 155 | + calloc.free(c.ref.model.matcha.dataDir); | ||
| 156 | + calloc.free(c.ref.model.matcha.tokens); | ||
| 157 | + calloc.free(c.ref.model.matcha.lexicon); | ||
| 158 | + calloc.free(c.ref.model.matcha.vocoder); | ||
| 159 | + calloc.free(c.ref.model.matcha.acousticModel); | ||
| 115 | calloc.free(c.ref.model.vits.dictDir); | 160 | calloc.free(c.ref.model.vits.dictDir); |
| 116 | calloc.free(c.ref.model.vits.dataDir); | 161 | calloc.free(c.ref.model.vits.dataDir); |
| 117 | calloc.free(c.ref.model.vits.tokens); | 162 | calloc.free(c.ref.model.vits.tokens); |
-
请 注册 或 登录 后发表评论