正在显示
17 个修改的文件
包含
323 行增加
和
28 行删除
| @@ -4,6 +4,34 @@ set -ex | @@ -4,6 +4,34 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd tts | ||
| 8 | + | ||
| 9 | +echo '----------matcha tts----------' | ||
| 10 | +./run-kitten-en.sh | ||
| 11 | +./run-kokoro-zh-en.sh | ||
| 12 | +./run-kokoro-en.sh | ||
| 13 | +./run-matcha-zh.sh | ||
| 14 | +./run-matcha-en.sh | ||
| 15 | +ls -lh *.wav | ||
| 16 | +rm -rf matcha-icefall-* | ||
| 17 | +rm *.onnx | ||
| 18 | + | ||
| 19 | +echo '----------piper tts----------' | ||
| 20 | +./run-piper.sh | ||
| 21 | +rm -rf vits-piper-* | ||
| 22 | + | ||
| 23 | +echo '----------coqui tts----------' | ||
| 24 | +./run-coqui.sh | ||
| 25 | +rm -rf vits-coqui-* | ||
| 26 | + | ||
| 27 | +echo '----------zh tts----------' | ||
| 28 | +./run-vits-zh.sh | ||
| 29 | +rm -rf sherpa-onnx-* | ||
| 30 | + | ||
| 31 | +ls -lh *.wav | ||
| 32 | + | ||
| 33 | +popd # tts | ||
| 34 | + | ||
| 7 | pushd vad | 35 | pushd vad |
| 8 | ./run-ten-vad.sh | 36 | ./run-ten-vad.sh |
| 9 | ./run.sh | 37 | ./run.sh |
| @@ -72,33 +100,6 @@ echo "speech enhancement with gtcrn models" | @@ -72,33 +100,6 @@ echo "speech enhancement with gtcrn models" | ||
| 72 | ls -lh | 100 | ls -lh |
| 73 | popd | 101 | popd |
| 74 | 102 | ||
| 75 | -pushd tts | ||
| 76 | - | ||
| 77 | -echo '----------matcha tts----------' | ||
| 78 | -./run-kokoro-zh-en.sh | ||
| 79 | -./run-kokoro-en.sh | ||
| 80 | -./run-matcha-zh.sh | ||
| 81 | -./run-matcha-en.sh | ||
| 82 | -ls -lh *.wav | ||
| 83 | -rm -rf matcha-icefall-* | ||
| 84 | -rm *.onnx | ||
| 85 | - | ||
| 86 | -echo '----------piper tts----------' | ||
| 87 | -./run-piper.sh | ||
| 88 | -rm -rf vits-piper-* | ||
| 89 | - | ||
| 90 | -echo '----------coqui tts----------' | ||
| 91 | -./run-coqui.sh | ||
| 92 | -rm -rf vits-coqui-* | ||
| 93 | - | ||
| 94 | -echo '----------zh tts----------' | ||
| 95 | -./run-vits-zh.sh | ||
| 96 | -rm -rf sherpa-onnx-* | ||
| 97 | - | ||
| 98 | -ls -lh *.wav | ||
| 99 | - | ||
| 100 | -popd # tts | ||
| 101 | - | ||
| 102 | pushd speaker-diarization | 103 | pushd speaker-diarization |
| 103 | echo '----------speaker diarization----------' | 104 | echo '----------speaker diarization----------' |
| 104 | ./run.sh | 105 | ./run.sh |
dart-api-examples/tts/bin/kitten-en.dart
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('model', help: 'Path to the onnx model') | ||
| 14 | + ..addOption('voices', help: 'Path to the voices.bin') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption( | ||
| 17 | + 'data-dir', | ||
| 18 | + help: 'Path to espeak-ng-data directory', | ||
| 19 | + defaultsTo: '', | ||
| 20 | + ) | ||
| 21 | + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
| 22 | + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
| 23 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 24 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 25 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 26 | + ..addOption( | ||
| 27 | + 'sid', | ||
| 28 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 29 | + defaultsTo: '0', | ||
| 30 | + ); | ||
| 31 | + final res = parser.parse(arguments); | ||
| 32 | + if (res['model'] == null || | ||
| 33 | + res['voices'] == null || | ||
| 34 | + res['tokens'] == null || | ||
| 35 | + res['data-dir'] == null || | ||
| 36 | + res['output-wav'] == null || | ||
| 37 | + res['text'] == null) { | ||
| 38 | + print(parser.usage); | ||
| 39 | + exit(1); | ||
| 40 | + } | ||
| 41 | + final model = res['model'] as String; | ||
| 42 | + final voices = res['voices'] as String; | ||
| 43 | + final tokens = res['tokens'] as String; | ||
| 44 | + final dataDir = res['data-dir'] as String; | ||
| 45 | + final ruleFsts = res['rule-fsts'] as String; | ||
| 46 | + final ruleFars = res['rule-fars'] as String; | ||
| 47 | + final text = res['text'] as String; | ||
| 48 | + final outputWav = res['output-wav'] as String; | ||
| 49 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 50 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 51 | + | ||
| 52 | + if (speed == 0) { | ||
| 53 | + speed = 1.0; | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + final kitten = sherpa_onnx.OfflineTtsKittenModelConfig( | ||
| 57 | + model: model, | ||
| 58 | + voices: voices, | ||
| 59 | + tokens: tokens, | ||
| 60 | + dataDir: dataDir, | ||
| 61 | + lengthScale: 1 / speed, | ||
| 62 | + ); | ||
| 63 | + | ||
| 64 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 65 | + kitten: kitten, | ||
| 66 | + numThreads: 1, | ||
| 67 | + debug: true, | ||
| 68 | + ); | ||
| 69 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 70 | + model: modelConfig, | ||
| 71 | + maxNumSenetences: 1, | ||
| 72 | + ruleFsts: ruleFsts, | ||
| 73 | + ruleFars: ruleFars, | ||
| 74 | + ); | ||
| 75 | + | ||
| 76 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 77 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 78 | + tts.free(); | ||
| 79 | + | ||
| 80 | + sherpa_onnx.writeWave( | ||
| 81 | + filename: outputWav, | ||
| 82 | + samples: audio.samples, | ||
| 83 | + sampleRate: audio.sampleRate, | ||
| 84 | + ); | ||
| 85 | + print('Saved to $outputWav'); | ||
| 86 | +} |
dart-api-examples/tts/run-kitten-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +# please visit | ||
| 8 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html | ||
| 9 | +# to download more models | ||
| 10 | +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 12 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 13 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +dart run \ | ||
| 17 | + ./bin/kitten-en.dart \ | ||
| 18 | + --model ./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 19 | + --voices ./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 20 | + --tokens ./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 21 | + --data-dir ./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 22 | + --sid 0 \ | ||
| 23 | + --speed 1.0 \ | ||
| 24 | + --output-wav kitten-en-0.wav \ | ||
| 25 | + --text "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 26 | + | ||
| 27 | +ls -lh *.wav |
| @@ -150,9 +150,18 @@ class AudioTagging { | @@ -150,9 +150,18 @@ class AudioTagging { | ||
| 150 | final labelsPtr = config.labels.toNativeUtf8(); | 150 | final labelsPtr = config.labels.toNativeUtf8(); |
| 151 | c.ref.labels = labelsPtr; | 151 | c.ref.labels = labelsPtr; |
| 152 | 152 | ||
| 153 | + if (SherpaOnnxBindings.sherpaOnnxCreateAudioTagging == null) { | ||
| 154 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 155 | + } | ||
| 156 | + | ||
| 153 | final ptr = | 157 | final ptr = |
| 154 | SherpaOnnxBindings.sherpaOnnxCreateAudioTagging?.call(c) ?? nullptr; | 158 | SherpaOnnxBindings.sherpaOnnxCreateAudioTagging?.call(c) ?? nullptr; |
| 155 | 159 | ||
| 160 | + if (ptr == nullptr) { | ||
| 161 | + throw Exception( | ||
| 162 | + "Failed to create audio tagging. Please check your config"); | ||
| 163 | + } | ||
| 164 | + | ||
| 156 | calloc.free(labelsPtr); | 165 | calloc.free(labelsPtr); |
| 157 | calloc.free(providerPtr); | 166 | calloc.free(providerPtr); |
| 158 | calloc.free(cedPtr); | 167 | calloc.free(cedPtr); |
| @@ -140,8 +140,16 @@ class KeywordSpotter { | @@ -140,8 +140,16 @@ class KeywordSpotter { | ||
| 140 | c.ref.keywordsBuf = config.keywordsBuf.toNativeUtf8(); | 140 | c.ref.keywordsBuf = config.keywordsBuf.toNativeUtf8(); |
| 141 | c.ref.keywordsBufSize = config.keywordsBufSize; | 141 | c.ref.keywordsBufSize = config.keywordsBufSize; |
| 142 | 142 | ||
| 143 | + if (SherpaOnnxBindings.createKeywordSpotter == null) { | ||
| 144 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 145 | + } | ||
| 146 | + | ||
| 143 | final ptr = SherpaOnnxBindings.createKeywordSpotter?.call(c) ?? nullptr; | 147 | final ptr = SherpaOnnxBindings.createKeywordSpotter?.call(c) ?? nullptr; |
| 144 | 148 | ||
| 149 | + if (ptr == nullptr) { | ||
| 150 | + throw Exception("Failed to create kws. Please check your config"); | ||
| 151 | + } | ||
| 152 | + | ||
| 145 | calloc.free(c.ref.keywordsBuf); | 153 | calloc.free(c.ref.keywordsBuf); |
| 146 | calloc.free(c.ref.keywordsFile); | 154 | calloc.free(c.ref.keywordsFile); |
| 147 | calloc.free(c.ref.model.bpeVocab); | 155 | calloc.free(c.ref.model.bpeVocab); |
| @@ -79,10 +79,19 @@ class OfflinePunctuation { | @@ -79,10 +79,19 @@ class OfflinePunctuation { | ||
| 79 | final providerPtr = config.model.provider.toNativeUtf8(); | 79 | final providerPtr = config.model.provider.toNativeUtf8(); |
| 80 | c.ref.model.provider = providerPtr; | 80 | c.ref.model.provider = providerPtr; |
| 81 | 81 | ||
| 82 | + if (SherpaOnnxBindings.sherpaOnnxCreateOfflinePunctuation == null) { | ||
| 83 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 84 | + } | ||
| 85 | + | ||
| 82 | final ptr = | 86 | final ptr = |
| 83 | SherpaOnnxBindings.sherpaOnnxCreateOfflinePunctuation?.call(c) ?? | 87 | SherpaOnnxBindings.sherpaOnnxCreateOfflinePunctuation?.call(c) ?? |
| 84 | nullptr; | 88 | nullptr; |
| 85 | 89 | ||
| 90 | + if (ptr == nullptr) { | ||
| 91 | + throw Exception( | ||
| 92 | + "Failed to create offline punctuation. Please check your config"); | ||
| 93 | + } | ||
| 94 | + | ||
| 86 | calloc.free(providerPtr); | 95 | calloc.free(providerPtr); |
| 87 | calloc.free(ctTransformerPtr); | 96 | calloc.free(ctTransformerPtr); |
| 88 | calloc.free(c); | 97 | calloc.free(c); |
| @@ -598,8 +598,17 @@ class OfflineRecognizer { | @@ -598,8 +598,17 @@ class OfflineRecognizer { | ||
| 598 | factory OfflineRecognizer(OfflineRecognizerConfig config) { | 598 | factory OfflineRecognizer(OfflineRecognizerConfig config) { |
| 599 | final c = convertConfig(config); | 599 | final c = convertConfig(config); |
| 600 | 600 | ||
| 601 | + if (SherpaOnnxBindings.createOfflineRecognizer == null) { | ||
| 602 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 603 | + } | ||
| 604 | + | ||
| 601 | final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; | 605 | final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; |
| 602 | 606 | ||
| 607 | + if (ptr == nullptr) { | ||
| 608 | + throw Exception( | ||
| 609 | + "Failed to create offline recognizer. Please check your config"); | ||
| 610 | + } | ||
| 611 | + | ||
| 603 | freeConfig(c); | 612 | freeConfig(c); |
| 604 | 613 | ||
| 605 | return OfflineRecognizer._(ptr: ptr, config: config); | 614 | return OfflineRecognizer._(ptr: ptr, config: config); |
| @@ -211,10 +211,19 @@ class OfflineSpeakerDiarization { | @@ -211,10 +211,19 @@ class OfflineSpeakerDiarization { | ||
| 211 | c.ref.minDurationOn = config.minDurationOn; | 211 | c.ref.minDurationOn = config.minDurationOn; |
| 212 | c.ref.minDurationOff = config.minDurationOff; | 212 | c.ref.minDurationOff = config.minDurationOff; |
| 213 | 213 | ||
| 214 | + if (SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization == null) { | ||
| 215 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 216 | + } | ||
| 217 | + | ||
| 214 | final ptr = | 218 | final ptr = |
| 215 | SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ?? | 219 | SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ?? |
| 216 | nullptr; | 220 | nullptr; |
| 217 | 221 | ||
| 222 | + if (ptr == nullptr) { | ||
| 223 | + throw Exception( | ||
| 224 | + "Failed to create offline speaker diarization. Please check your config"); | ||
| 225 | + } | ||
| 226 | + | ||
| 218 | calloc.free(c.ref.embedding.provider); | 227 | calloc.free(c.ref.embedding.provider); |
| 219 | calloc.free(c.ref.embedding.model); | 228 | calloc.free(c.ref.embedding.model); |
| 220 | calloc.free(c.ref.segmentation.provider); | 229 | calloc.free(c.ref.segmentation.provider); |
| @@ -118,10 +118,19 @@ class OfflineSpeechDenoiser { | @@ -118,10 +118,19 @@ class OfflineSpeechDenoiser { | ||
| 118 | c.ref.model.debug = config.model.debug ? 1 : 0; | 118 | c.ref.model.debug = config.model.debug ? 1 : 0; |
| 119 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 119 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| 120 | 120 | ||
| 121 | + if (SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser == null) { | ||
| 122 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 123 | + } | ||
| 124 | + | ||
| 121 | final ptr = | 125 | final ptr = |
| 122 | SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser?.call(c) ?? | 126 | SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser?.call(c) ?? |
| 123 | nullptr; | 127 | nullptr; |
| 124 | 128 | ||
| 129 | + if (ptr == nullptr) { | ||
| 130 | + throw Exception( | ||
| 131 | + "Failed to create offline speech denoiser. Please check your config"); | ||
| 132 | + } | ||
| 133 | + | ||
| 125 | calloc.free(c.ref.model.provider); | 134 | calloc.free(c.ref.model.provider); |
| 126 | calloc.free(c.ref.model.gtcrn.model); | 135 | calloc.free(c.ref.model.gtcrn.model); |
| 127 | 136 |
| @@ -89,9 +89,18 @@ class OnlinePunctuation { | @@ -89,9 +89,18 @@ class OnlinePunctuation { | ||
| 89 | final providerPtr = config.model.provider.toNativeUtf8(); | 89 | final providerPtr = config.model.provider.toNativeUtf8(); |
| 90 | c.ref.model.provider = providerPtr; | 90 | c.ref.model.provider = providerPtr; |
| 91 | 91 | ||
| 92 | + if (SherpaOnnxBindings.sherpaOnnxCreateOnlinePunctuation == null) { | ||
| 93 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 94 | + } | ||
| 95 | + | ||
| 92 | final ptr = SherpaOnnxBindings.sherpaOnnxCreateOnlinePunctuation?.call(c) ?? | 96 | final ptr = SherpaOnnxBindings.sherpaOnnxCreateOnlinePunctuation?.call(c) ?? |
| 93 | nullptr; | 97 | nullptr; |
| 94 | 98 | ||
| 99 | + if (ptr == nullptr) { | ||
| 100 | + throw Exception( | ||
| 101 | + "Failed to create online punctuation. Please check your config"); | ||
| 102 | + } | ||
| 103 | + | ||
| 95 | // Free the allocated strings and struct memory | 104 | // Free the allocated strings and struct memory |
| 96 | calloc.free(providerPtr); | 105 | calloc.free(providerPtr); |
| 97 | calloc.free(cnnBiLstmPtr); | 106 | calloc.free(cnnBiLstmPtr); |
| @@ -391,8 +391,17 @@ class OnlineRecognizer { | @@ -391,8 +391,17 @@ class OnlineRecognizer { | ||
| 391 | c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8(); | 391 | c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8(); |
| 392 | c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8(); | 392 | c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8(); |
| 393 | 393 | ||
| 394 | + if (SherpaOnnxBindings.createOnlineRecognizer == null) { | ||
| 395 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 396 | + } | ||
| 397 | + | ||
| 394 | final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr; | 398 | final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr; |
| 395 | 399 | ||
| 400 | + if (ptr == nullptr) { | ||
| 401 | + throw Exception( | ||
| 402 | + "Failed to create online recognizer. Please check your config"); | ||
| 403 | + } | ||
| 404 | + | ||
| 396 | calloc.free(c.ref.hr.dictDir); | 405 | calloc.free(c.ref.hr.dictDir); |
| 397 | calloc.free(c.ref.hr.lexicon); | 406 | calloc.free(c.ref.hr.lexicon); |
| 398 | calloc.free(c.ref.hr.ruleFsts); | 407 | calloc.free(c.ref.hr.ruleFsts); |
| @@ -204,6 +204,16 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct { | @@ -204,6 +204,16 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct { | ||
| 204 | external Pointer<Utf8> lang; | 204 | external Pointer<Utf8> lang; |
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | +final class SherpaOnnxOfflineTtsKittenModelConfig extends Struct { | ||
| 208 | + external Pointer<Utf8> model; | ||
| 209 | + external Pointer<Utf8> voices; | ||
| 210 | + external Pointer<Utf8> tokens; | ||
| 211 | + external Pointer<Utf8> dataDir; | ||
| 212 | + | ||
| 213 | + @Float() | ||
| 214 | + external double lengthScale; | ||
| 215 | +} | ||
| 216 | + | ||
| 207 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { | 217 | final class SherpaOnnxOfflineTtsModelConfig extends Struct { |
| 208 | external SherpaOnnxOfflineTtsVitsModelConfig vits; | 218 | external SherpaOnnxOfflineTtsVitsModelConfig vits; |
| 209 | @Int32() | 219 | @Int32() |
| @@ -215,6 +225,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct { | @@ -215,6 +225,7 @@ final class SherpaOnnxOfflineTtsModelConfig extends Struct { | ||
| 215 | external Pointer<Utf8> provider; | 225 | external Pointer<Utf8> provider; |
| 216 | external SherpaOnnxOfflineTtsMatchaModelConfig matcha; | 226 | external SherpaOnnxOfflineTtsMatchaModelConfig matcha; |
| 217 | external SherpaOnnxOfflineTtsKokoroModelConfig kokoro; | 227 | external SherpaOnnxOfflineTtsKokoroModelConfig kokoro; |
| 228 | + external SherpaOnnxOfflineTtsKittenModelConfig kitten; | ||
| 218 | } | 229 | } |
| 219 | 230 | ||
| 220 | final class SherpaOnnxOfflineTtsConfig extends Struct { | 231 | final class SherpaOnnxOfflineTtsConfig extends Struct { |
| @@ -60,9 +60,18 @@ class SpeakerEmbeddingExtractor { | @@ -60,9 +60,18 @@ class SpeakerEmbeddingExtractor { | ||
| 60 | final providerPtr = config.provider.toNativeUtf8(); | 60 | final providerPtr = config.provider.toNativeUtf8(); |
| 61 | c.ref.provider = providerPtr; | 61 | c.ref.provider = providerPtr; |
| 62 | 62 | ||
| 63 | + if (SherpaOnnxBindings.createSpeakerEmbeddingExtractor == null) { | ||
| 64 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 65 | + } | ||
| 66 | + | ||
| 63 | final ptr = | 67 | final ptr = |
| 64 | SherpaOnnxBindings.createSpeakerEmbeddingExtractor?.call(c) ?? nullptr; | 68 | SherpaOnnxBindings.createSpeakerEmbeddingExtractor?.call(c) ?? nullptr; |
| 65 | 69 | ||
| 70 | + if (ptr == nullptr) { | ||
| 71 | + throw Exception( | ||
| 72 | + "Failed to create speaker embedding extractor. Please check your config"); | ||
| 73 | + } | ||
| 74 | + | ||
| 66 | calloc.free(providerPtr); | 75 | calloc.free(providerPtr); |
| 67 | calloc.free(modelPtr); | 76 | calloc.free(modelPtr); |
| 68 | calloc.free(c); | 77 | calloc.free(c); |
| @@ -159,11 +159,51 @@ class OfflineTtsKokoroModelConfig { | @@ -159,11 +159,51 @@ class OfflineTtsKokoroModelConfig { | ||
| 159 | final String lang; | 159 | final String lang; |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | +class OfflineTtsKittenModelConfig { | ||
| 163 | + const OfflineTtsKittenModelConfig({ | ||
| 164 | + this.model = '', | ||
| 165 | + this.voices = '', | ||
| 166 | + this.tokens = '', | ||
| 167 | + this.dataDir = '', | ||
| 168 | + this.lengthScale = 1.0, | ||
| 169 | + }); | ||
| 170 | + | ||
| 171 | + factory OfflineTtsKittenModelConfig.fromJson(Map<String, dynamic> json) { | ||
| 172 | + return OfflineTtsKittenModelConfig( | ||
| 173 | + model: json['model'] as String? ?? '', | ||
| 174 | + voices: json['voices'] as String? ?? '', | ||
| 175 | + tokens: json['tokens'] as String? ?? '', | ||
| 176 | + dataDir: json['dataDir'] as String? ?? '', | ||
| 177 | + lengthScale: (json['lengthScale'] as num?)?.toDouble() ?? 1.0, | ||
| 178 | + ); | ||
| 179 | + } | ||
| 180 | + | ||
| 181 | + @override | ||
| 182 | + String toString() { | ||
| 183 | + return 'OfflineTtsKittenModelConfig(model: $model, voices: $voices, tokens: $tokens, dataDir: $dataDir, lengthScale: $lengthScale)'; | ||
| 184 | + } | ||
| 185 | + | ||
| 186 | + Map<String, dynamic> toJson() => { | ||
| 187 | + 'model': model, | ||
| 188 | + 'voices': voices, | ||
| 189 | + 'tokens': tokens, | ||
| 190 | + 'dataDir': dataDir, | ||
| 191 | + 'lengthScale': lengthScale, | ||
| 192 | + }; | ||
| 193 | + | ||
| 194 | + final String model; | ||
| 195 | + final String voices; | ||
| 196 | + final String tokens; | ||
| 197 | + final String dataDir; | ||
| 198 | + final double lengthScale; | ||
| 199 | +} | ||
| 200 | + | ||
| 162 | class OfflineTtsModelConfig { | 201 | class OfflineTtsModelConfig { |
| 163 | const OfflineTtsModelConfig({ | 202 | const OfflineTtsModelConfig({ |
| 164 | this.vits = const OfflineTtsVitsModelConfig(), | 203 | this.vits = const OfflineTtsVitsModelConfig(), |
| 165 | this.matcha = const OfflineTtsMatchaModelConfig(), | 204 | this.matcha = const OfflineTtsMatchaModelConfig(), |
| 166 | this.kokoro = const OfflineTtsKokoroModelConfig(), | 205 | this.kokoro = const OfflineTtsKokoroModelConfig(), |
| 206 | + this.kitten = const OfflineTtsKittenModelConfig(), | ||
| 167 | this.numThreads = 1, | 207 | this.numThreads = 1, |
| 168 | this.debug = true, | 208 | this.debug = true, |
| 169 | this.provider = 'cpu', | 209 | this.provider = 'cpu', |
| @@ -177,6 +217,8 @@ class OfflineTtsModelConfig { | @@ -177,6 +217,8 @@ class OfflineTtsModelConfig { | ||
| 177 | json['matcha'] as Map<String, dynamic>? ?? const {}), | 217 | json['matcha'] as Map<String, dynamic>? ?? const {}), |
| 178 | kokoro: OfflineTtsKokoroModelConfig.fromJson( | 218 | kokoro: OfflineTtsKokoroModelConfig.fromJson( |
| 179 | json['kokoro'] as Map<String, dynamic>? ?? const {}), | 219 | json['kokoro'] as Map<String, dynamic>? ?? const {}), |
| 220 | + kitten: OfflineTtsKittenModelConfig.fromJson( | ||
| 221 | + json['kitten'] as Map<String, dynamic>? ?? const {}), | ||
| 180 | numThreads: json['numThreads'] as int? ?? 1, | 222 | numThreads: json['numThreads'] as int? ?? 1, |
| 181 | debug: json['debug'] as bool? ?? true, | 223 | debug: json['debug'] as bool? ?? true, |
| 182 | provider: json['provider'] as String? ?? 'cpu', | 224 | provider: json['provider'] as String? ?? 'cpu', |
| @@ -185,13 +227,14 @@ class OfflineTtsModelConfig { | @@ -185,13 +227,14 @@ class OfflineTtsModelConfig { | ||
| 185 | 227 | ||
| 186 | @override | 228 | @override |
| 187 | String toString() { | 229 | String toString() { |
| 188 | - return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, numThreads: $numThreads, debug: $debug, provider: $provider)'; | 230 | + return 'OfflineTtsModelConfig(vits: $vits, matcha: $matcha, kokoro: $kokoro, kitten: $kitten, numThreads: $numThreads, debug: $debug, provider: $provider)'; |
| 189 | } | 231 | } |
| 190 | 232 | ||
| 191 | Map<String, dynamic> toJson() => { | 233 | Map<String, dynamic> toJson() => { |
| 192 | 'vits': vits.toJson(), | 234 | 'vits': vits.toJson(), |
| 193 | 'matcha': matcha.toJson(), | 235 | 'matcha': matcha.toJson(), |
| 194 | 'kokoro': kokoro.toJson(), | 236 | 'kokoro': kokoro.toJson(), |
| 237 | + 'kitten': kitten.toJson(), | ||
| 195 | 'numThreads': numThreads, | 238 | 'numThreads': numThreads, |
| 196 | 'debug': debug, | 239 | 'debug': debug, |
| 197 | 'provider': provider, | 240 | 'provider': provider, |
| @@ -200,6 +243,7 @@ class OfflineTtsModelConfig { | @@ -200,6 +243,7 @@ class OfflineTtsModelConfig { | ||
| 200 | final OfflineTtsVitsModelConfig vits; | 243 | final OfflineTtsVitsModelConfig vits; |
| 201 | final OfflineTtsMatchaModelConfig matcha; | 244 | final OfflineTtsMatchaModelConfig matcha; |
| 202 | final OfflineTtsKokoroModelConfig kokoro; | 245 | final OfflineTtsKokoroModelConfig kokoro; |
| 246 | + final OfflineTtsKittenModelConfig kitten; | ||
| 203 | final int numThreads; | 247 | final int numThreads; |
| 204 | final bool debug; | 248 | final bool debug; |
| 205 | final String provider; | 249 | final String provider; |
| @@ -292,6 +336,12 @@ class OfflineTts { | @@ -292,6 +336,12 @@ class OfflineTts { | ||
| 292 | c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8(); | 336 | c.ref.model.kokoro.lexicon = config.model.kokoro.lexicon.toNativeUtf8(); |
| 293 | c.ref.model.kokoro.lang = config.model.kokoro.lang.toNativeUtf8(); | 337 | c.ref.model.kokoro.lang = config.model.kokoro.lang.toNativeUtf8(); |
| 294 | 338 | ||
| 339 | + c.ref.model.kitten.model = config.model.kitten.model.toNativeUtf8(); | ||
| 340 | + c.ref.model.kitten.voices = config.model.kitten.voices.toNativeUtf8(); | ||
| 341 | + c.ref.model.kitten.tokens = config.model.kitten.tokens.toNativeUtf8(); | ||
| 342 | + c.ref.model.kitten.dataDir = config.model.kitten.dataDir.toNativeUtf8(); | ||
| 343 | + c.ref.model.kitten.lengthScale = config.model.kitten.lengthScale; | ||
| 344 | + | ||
| 295 | c.ref.model.numThreads = config.model.numThreads; | 345 | c.ref.model.numThreads = config.model.numThreads; |
| 296 | c.ref.model.debug = config.model.debug ? 1 : 0; | 346 | c.ref.model.debug = config.model.debug ? 1 : 0; |
| 297 | c.ref.model.provider = config.model.provider.toNativeUtf8(); | 347 | c.ref.model.provider = config.model.provider.toNativeUtf8(); |
| @@ -301,12 +351,25 @@ class OfflineTts { | @@ -301,12 +351,25 @@ class OfflineTts { | ||
| 301 | c.ref.ruleFars = config.ruleFars.toNativeUtf8(); | 351 | c.ref.ruleFars = config.ruleFars.toNativeUtf8(); |
| 302 | c.ref.silenceScale = config.silenceScale; | 352 | c.ref.silenceScale = config.silenceScale; |
| 303 | 353 | ||
| 354 | + if (SherpaOnnxBindings.createOfflineTts == null) { | ||
| 355 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 356 | + } | ||
| 357 | + | ||
| 304 | final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr; | 358 | final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr; |
| 305 | 359 | ||
| 360 | + if (ptr == nullptr) { | ||
| 361 | + throw Exception("Failed to create offline tts. Please check your config"); | ||
| 362 | + } | ||
| 363 | + | ||
| 306 | calloc.free(c.ref.ruleFars); | 364 | calloc.free(c.ref.ruleFars); |
| 307 | calloc.free(c.ref.ruleFsts); | 365 | calloc.free(c.ref.ruleFsts); |
| 308 | calloc.free(c.ref.model.provider); | 366 | calloc.free(c.ref.model.provider); |
| 309 | 367 | ||
| 368 | + calloc.free(c.ref.model.kitten.dataDir); | ||
| 369 | + calloc.free(c.ref.model.kitten.tokens); | ||
| 370 | + calloc.free(c.ref.model.kitten.voices); | ||
| 371 | + calloc.free(c.ref.model.kitten.model); | ||
| 372 | + | ||
| 310 | calloc.free(c.ref.model.kokoro.lang); | 373 | calloc.free(c.ref.model.kokoro.lang); |
| 311 | calloc.free(c.ref.model.kokoro.lexicon); | 374 | calloc.free(c.ref.model.kokoro.lexicon); |
| 312 | calloc.free(c.ref.model.kokoro.dictDir); | 375 | calloc.free(c.ref.model.kokoro.dictDir); |
| @@ -153,9 +153,19 @@ class CircularBuffer { | @@ -153,9 +153,19 @@ class CircularBuffer { | ||
| 153 | /// to avoid memory leak. | 153 | /// to avoid memory leak. |
| 154 | factory CircularBuffer({required int capacity}) { | 154 | factory CircularBuffer({required int capacity}) { |
| 155 | assert(capacity > 0, 'capacity is $capacity'); | 155 | assert(capacity > 0, 'capacity is $capacity'); |
| 156 | + | ||
| 157 | + if (SherpaOnnxBindings.createCircularBuffer == null) { | ||
| 158 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 159 | + } | ||
| 160 | + | ||
| 156 | final p = | 161 | final p = |
| 157 | SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr; | 162 | SherpaOnnxBindings.createCircularBuffer?.call(capacity) ?? nullptr; |
| 158 | 163 | ||
| 164 | + if (p == nullptr) { | ||
| 165 | + throw Exception( | ||
| 166 | + "Failed to create circular buffer. Please check your config"); | ||
| 167 | + } | ||
| 168 | + | ||
| 159 | return CircularBuffer._(ptr: p); | 169 | return CircularBuffer._(ptr: p); |
| 160 | } | 170 | } |
| 161 | 171 | ||
| @@ -243,10 +253,18 @@ class VoiceActivityDetector { | @@ -243,10 +253,18 @@ class VoiceActivityDetector { | ||
| 243 | 253 | ||
| 244 | c.ref.debug = config.debug ? 1 : 0; | 254 | c.ref.debug = config.debug ? 1 : 0; |
| 245 | 255 | ||
| 256 | + if (SherpaOnnxBindings.createVoiceActivityDetector == null) { | ||
| 257 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 258 | + } | ||
| 259 | + | ||
| 246 | final ptr = SherpaOnnxBindings.createVoiceActivityDetector | 260 | final ptr = SherpaOnnxBindings.createVoiceActivityDetector |
| 247 | ?.call(c, bufferSizeInSeconds) ?? | 261 | ?.call(c, bufferSizeInSeconds) ?? |
| 248 | nullptr; | 262 | nullptr; |
| 249 | 263 | ||
| 264 | + if (ptr == nullptr) { | ||
| 265 | + throw Exception("Failed to create vad. Please check your config"); | ||
| 266 | + } | ||
| 267 | + | ||
| 250 | calloc.free(providerPtr); | 268 | calloc.free(providerPtr); |
| 251 | calloc.free(tenVadModelPtr); | 269 | calloc.free(tenVadModelPtr); |
| 252 | calloc.free(sileroVadModelPtr); | 270 | calloc.free(sileroVadModelPtr); |
| @@ -15,6 +15,11 @@ class WaveData { | @@ -15,6 +15,11 @@ class WaveData { | ||
| 15 | 15 | ||
| 16 | WaveData readWave(String filename) { | 16 | WaveData readWave(String filename) { |
| 17 | final Pointer<Utf8> str = filename.toNativeUtf8(); | 17 | final Pointer<Utf8> str = filename.toNativeUtf8(); |
| 18 | + | ||
| 19 | + if (SherpaOnnxBindings.readWave == null) { | ||
| 20 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 21 | + } | ||
| 22 | + | ||
| 18 | Pointer<SherpaOnnxWave> wave = | 23 | Pointer<SherpaOnnxWave> wave = |
| 19 | SherpaOnnxBindings.readWave?.call(str) ?? nullptr; | 24 | SherpaOnnxBindings.readWave?.call(str) ?? nullptr; |
| 20 | calloc.free(str); | 25 | calloc.free(str); |
| @@ -17,6 +17,10 @@ bool writeWave( | @@ -17,6 +17,10 @@ bool writeWave( | ||
| 17 | final pList = p.asTypedList(n); | 17 | final pList = p.asTypedList(n); |
| 18 | pList.setAll(0, samples); | 18 | pList.setAll(0, samples); |
| 19 | 19 | ||
| 20 | + if (SherpaOnnxBindings.writeWave == null) { | ||
| 21 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 22 | + } | ||
| 23 | + | ||
| 20 | int ok = | 24 | int ok = |
| 21 | SherpaOnnxBindings.writeWave?.call(p, n, sampleRate, filenamePtr) ?? 0; | 25 | SherpaOnnxBindings.writeWave?.call(p, n, sampleRate, filenamePtr) ?? 0; |
| 22 | 26 |
-
请 注册 或 登录 后发表评论