Committed by
GitHub
Add TTS API and examples for Dart (#1010)
正在显示
20 个修改的文件
包含
874 行增加
和
0 行删除
| @@ -4,6 +4,22 @@ set -ex | @@ -4,6 +4,22 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd tts | ||
| 8 | + | ||
| 9 | +echo '----------piper tts----------' | ||
| 10 | +./run-piper.sh | ||
| 11 | +rm -rf vits-piper-* | ||
| 12 | + | ||
| 13 | +echo '----------coqui tts----------' | ||
| 14 | +./run-coqui.sh | ||
| 15 | +rm -rf vits-coqui-* | ||
| 16 | + | ||
| 17 | +echo '----------zh tts----------' | ||
| 18 | +./run-zh.sh | ||
| 19 | +rm -rf sherpa-onnx-* | ||
| 20 | + | ||
| 21 | +popd # tts | ||
| 22 | + | ||
| 7 | pushd streaming-asr | 23 | pushd streaming-asr |
| 8 | 24 | ||
| 9 | echo '----------streaming zipformer ctc HLG----------' | 25 | echo '----------streaming zipformer ctc HLG----------' |
| @@ -92,5 +92,6 @@ jobs: | @@ -92,5 +92,6 @@ jobs: | ||
| 92 | cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml | 92 | cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml |
| 93 | cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml | 93 | cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml |
| 94 | cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml | 94 | cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml |
| 95 | + cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml | ||
| 95 | 96 | ||
| 96 | .github/scripts/test-dart.sh | 97 | .github/scripts/test-dart.sh |
| 1 | # Introduction | 1 | # Introduction |
| 2 | 2 | ||
| 3 | This folder contains examples for streaming ASR with Dart API. | 3 | This folder contains examples for streaming ASR with Dart API. |
| 4 | + | ||
| 4 | | File | Description| | 5 | | File | Description| |
| 5 | |------|------------| | 6 | |------|------------| |
| 6 | |[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)| | 7 | |[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)| |
dart-api-examples/tts/.gitignore
0 → 100644
dart-api-examples/tts/CHANGELOG.md
0 → 100644
dart-api-examples/tts/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This folder contains examples for text to speech with Dart API. | ||
| 4 | + | ||
| 5 | +| File | Description| | ||
| 6 | +|------|------------| | ||
| 7 | +|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)| | ||
| 8 | +|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)| | ||
| 9 | +|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)| | ||
| 10 | + |
dart-api-examples/tts/analysis_options.yaml
0 → 100644
| 1 | +# This file configures the static analysis results for your project (errors, | ||
| 2 | +# warnings, and lints). | ||
| 3 | +# | ||
| 4 | +# This enables the 'recommended' set of lints from `package:lints`. | ||
| 5 | +# This set helps identify many issues that may lead to problems when running | ||
| 6 | +# or consuming Dart code, and enforces writing Dart using a single, idiomatic | ||
| 7 | +# style and format. | ||
| 8 | +# | ||
| 9 | +# If you want a smaller set of lints you can change this to specify | ||
| 10 | +# 'package:lints/core.yaml'. These are just the most critical lints | ||
| 11 | +# (the recommended set includes the core lints). | ||
| 12 | +# The core lints are also what is used by pub.dev for scoring packages. | ||
| 13 | + | ||
| 14 | +include: package:lints/recommended.yaml | ||
| 15 | + | ||
| 16 | +# Uncomment the following section to specify additional rules. | ||
| 17 | + | ||
| 18 | +# linter: | ||
| 19 | +# rules: | ||
| 20 | +# - camel_case_types | ||
| 21 | + | ||
| 22 | +# analyzer: | ||
| 23 | +# exclude: | ||
| 24 | +# - path/to/excluded/files/** | ||
| 25 | + | ||
| 26 | +# For more information about the core and recommended set of lints, see | ||
| 27 | +# https://dart.dev/go/core-lints | ||
| 28 | + | ||
| 29 | +# For additional information about configuring this file, see | ||
| 30 | +# https://dart.dev/guides/language/analysis-options |
dart-api-examples/tts/bin/coqui.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | + | ||
| 5 | +import 'package:args/args.dart'; | ||
| 6 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 7 | + | ||
| 8 | +import './init.dart'; | ||
| 9 | + | ||
| 10 | +void main(List<String> arguments) async { | ||
| 11 | + await initSherpaOnnx(); | ||
| 12 | + | ||
| 13 | + final parser = ArgParser() | ||
| 14 | + ..addOption('model', help: 'Path to the ONNX model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 17 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 18 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 19 | + ..addOption( | ||
| 20 | + 'sid', | ||
| 21 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 22 | + defaultsTo: '0', | ||
| 23 | + ); | ||
| 24 | + final res = parser.parse(arguments); | ||
| 25 | + if (res['model'] == null || | ||
| 26 | + res['tokens'] == null || | ||
| 27 | + res['output-wav'] == null || | ||
| 28 | + res['text'] == null) { | ||
| 29 | + print(parser.usage); | ||
| 30 | + exit(1); | ||
| 31 | + } | ||
| 32 | + final model = res['model'] as String; | ||
| 33 | + final tokens = res['tokens'] as String; | ||
| 34 | + final text = res['text'] as String; | ||
| 35 | + final outputWav = res['output-wav'] as String; | ||
| 36 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 37 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 38 | + | ||
| 39 | + if (speed == 0) { | ||
| 40 | + speed = 1.0; | ||
| 41 | + } | ||
| 42 | + | ||
| 43 | + final vits = sherpa_onnx.OfflineTtsVitsModelConfig( | ||
| 44 | + model: model, | ||
| 45 | + tokens: tokens, | ||
| 46 | + lengthScale: 1 / speed, | ||
| 47 | + ); | ||
| 48 | + | ||
| 49 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 50 | + vits: vits, | ||
| 51 | + numThreads: 1, | ||
| 52 | + debug: true, | ||
| 53 | + ); | ||
| 54 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 55 | + model: modelConfig, | ||
| 56 | + maxNumSenetences: 1, | ||
| 57 | + ); | ||
| 58 | + | ||
| 59 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 60 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 61 | + tts.free(); | ||
| 62 | + | ||
| 63 | + sherpa_onnx.writeWave( | ||
| 64 | + filename: outputWav, | ||
| 65 | + samples: audio.samples, | ||
| 66 | + sampleRate: audio.sampleRate, | ||
| 67 | + ); | ||
| 68 | + print('Saved to ${outputWav}'); | ||
| 69 | +} |
dart-api-examples/tts/bin/init.dart
0 → 120000
| 1 | +../../vad/bin/init.dart |
dart-api-examples/tts/bin/piper.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | + | ||
| 5 | +import 'package:args/args.dart'; | ||
| 6 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 7 | + | ||
| 8 | +import './init.dart'; | ||
| 9 | + | ||
| 10 | +void main(List<String> arguments) async { | ||
| 11 | + await initSherpaOnnx(); | ||
| 12 | + | ||
| 13 | + final parser = ArgParser() | ||
| 14 | + ..addOption('model', help: 'Path to the ONNX model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption('data-dir', help: 'Path to espeak-ng-data directory') | ||
| 17 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 18 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 19 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 20 | + ..addOption( | ||
| 21 | + 'sid', | ||
| 22 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 23 | + defaultsTo: '0', | ||
| 24 | + ); | ||
| 25 | + final res = parser.parse(arguments); | ||
| 26 | + if (res['model'] == null || | ||
| 27 | + res['tokens'] == null || | ||
| 28 | + res['data-dir'] == null || | ||
| 29 | + res['output-wav'] == null || | ||
| 30 | + res['text'] == null) { | ||
| 31 | + print(parser.usage); | ||
| 32 | + exit(1); | ||
| 33 | + } | ||
| 34 | + final model = res['model'] as String; | ||
| 35 | + final tokens = res['tokens'] as String; | ||
| 36 | + final dataDir = res['data-dir'] as String; | ||
| 37 | + final text = res['text'] as String; | ||
| 38 | + final outputWav = res['output-wav'] as String; | ||
| 39 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 40 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 41 | + | ||
| 42 | + if (speed == 0) { | ||
| 43 | + speed = 1.0; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + final vits = sherpa_onnx.OfflineTtsVitsModelConfig( | ||
| 47 | + model: model, | ||
| 48 | + tokens: tokens, | ||
| 49 | + dataDir: dataDir, | ||
| 50 | + lengthScale: 1 / speed, | ||
| 51 | + ); | ||
| 52 | + | ||
| 53 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 54 | + vits: vits, | ||
| 55 | + numThreads: 1, | ||
| 56 | + debug: true, | ||
| 57 | + ); | ||
| 58 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 59 | + model: modelConfig, | ||
| 60 | + maxNumSenetences: 1, | ||
| 61 | + ); | ||
| 62 | + | ||
| 63 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 64 | + final audio = tts.generateWithCallback( | ||
| 65 | + text: text, | ||
| 66 | + sid: sid, | ||
| 67 | + speed: speed, | ||
| 68 | + callback: (Float32List samples) { | ||
| 69 | + print('${samples.length} samples received'); | ||
| 70 | + // You can play samples in a separate thread/isolate | ||
| 71 | + }); | ||
| 72 | + tts.free(); | ||
| 73 | + | ||
| 74 | + sherpa_onnx.writeWave( | ||
| 75 | + filename: outputWav, | ||
| 76 | + samples: audio.samples, | ||
| 77 | + sampleRate: audio.sampleRate, | ||
| 78 | + ); | ||
| 79 | + print('Saved to ${outputWav}'); | ||
| 80 | +} |
dart-api-examples/tts/bin/zh.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | + | ||
| 5 | +import 'package:args/args.dart'; | ||
| 6 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 7 | + | ||
| 8 | +import './init.dart'; | ||
| 9 | + | ||
| 10 | +void main(List<String> arguments) async { | ||
| 11 | + await initSherpaOnnx(); | ||
| 12 | + | ||
| 13 | + final parser = ArgParser() | ||
| 14 | + ..addOption('model', help: 'Path to the ONNX model') | ||
| 15 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 16 | + ..addOption('lexicon', help: 'Path to lexicon.txt') | ||
| 17 | + ..addOption( | ||
| 18 | + 'dict-dir', | ||
| 19 | + help: 'Path to jieba dict directory', | ||
| 20 | + defaultsTo: '', | ||
| 21 | + ) | ||
| 22 | + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') | ||
| 23 | + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '') | ||
| 24 | + ..addOption('text', help: 'Text to generate TTS for') | ||
| 25 | + ..addOption('output-wav', help: 'Filename to save the generated audio') | ||
| 26 | + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0') | ||
| 27 | + ..addOption( | ||
| 28 | + 'sid', | ||
| 29 | + help: 'Speaker ID to select. Used only for multi-speaker TTS', | ||
| 30 | + defaultsTo: '0', | ||
| 31 | + ); | ||
| 32 | + final res = parser.parse(arguments); | ||
| 33 | + if (res['model'] == null || | ||
| 34 | + res['lexicon'] == null || | ||
| 35 | + res['tokens'] == null || | ||
| 36 | + res['output-wav'] == null || | ||
| 37 | + res['text'] == null) { | ||
| 38 | + print(parser.usage); | ||
| 39 | + exit(1); | ||
| 40 | + } | ||
| 41 | + final model = res['model'] as String; | ||
| 42 | + final lexicon = res['lexicon'] as String; | ||
| 43 | + final tokens = res['tokens'] as String; | ||
| 44 | + final dictDir = res['dict-dir'] as String; | ||
| 45 | + final ruleFsts = res['rule-fsts'] as String; | ||
| 46 | + final ruleFars = res['rule-fars'] as String; | ||
| 47 | + final text = res['text'] as String; | ||
| 48 | + final outputWav = res['output-wav'] as String; | ||
| 49 | + var speed = double.tryParse(res['speed'] as String) ?? 1.0; | ||
| 50 | + final sid = int.tryParse(res['sid'] as String) ?? 0; | ||
| 51 | + | ||
| 52 | + if (speed == 0) { | ||
| 53 | + speed = 1.0; | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + final vits = sherpa_onnx.OfflineTtsVitsModelConfig( | ||
| 57 | + model: model, | ||
| 58 | + lexicon: lexicon, | ||
| 59 | + tokens: tokens, | ||
| 60 | + dictDir: dictDir, | ||
| 61 | + lengthScale: 1 / speed, | ||
| 62 | + ); | ||
| 63 | + | ||
| 64 | + final modelConfig = sherpa_onnx.OfflineTtsModelConfig( | ||
| 65 | + vits: vits, | ||
| 66 | + numThreads: 1, | ||
| 67 | + debug: true, | ||
| 68 | + ); | ||
| 69 | + final config = sherpa_onnx.OfflineTtsConfig( | ||
| 70 | + model: modelConfig, | ||
| 71 | + maxNumSenetences: 1, | ||
| 72 | + ruleFsts: ruleFsts, | ||
| 73 | + ruleFars: ruleFars, | ||
| 74 | + ); | ||
| 75 | + | ||
| 76 | + final tts = sherpa_onnx.OfflineTts(config); | ||
| 77 | + final audio = tts.generate(text: text, sid: sid, speed: speed); | ||
| 78 | + tts.free(); | ||
| 79 | + | ||
| 80 | + sherpa_onnx.writeWave( | ||
| 81 | + filename: outputWav, | ||
| 82 | + samples: audio.samples, | ||
| 83 | + sampleRate: audio.sampleRate, | ||
| 84 | + ); | ||
| 85 | + print('Saved to ${outputWav}'); | ||
| 86 | +} |
dart-api-examples/tts/pubspec.lock
0 → 100644
| 1 | +# Generated by pub | ||
| 2 | +# See https://dart.dev/tools/pub/glossary#lockfile | ||
| 3 | +packages: | ||
| 4 | + args: | ||
| 5 | + dependency: "direct main" | ||
| 6 | + description: | ||
| 7 | + name: args | ||
| 8 | + sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a" | ||
| 9 | + url: "https://pub.dev" | ||
| 10 | + source: hosted | ||
| 11 | + version: "2.5.0" | ||
| 12 | + characters: | ||
| 13 | + dependency: transitive | ||
| 14 | + description: | ||
| 15 | + name: characters | ||
| 16 | + sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605" | ||
| 17 | + url: "https://pub.dev" | ||
| 18 | + source: hosted | ||
| 19 | + version: "1.3.0" | ||
| 20 | + collection: | ||
| 21 | + dependency: transitive | ||
| 22 | + description: | ||
| 23 | + name: collection | ||
| 24 | + sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a | ||
| 25 | + url: "https://pub.dev" | ||
| 26 | + source: hosted | ||
| 27 | + version: "1.18.0" | ||
| 28 | + ffi: | ||
| 29 | + dependency: transitive | ||
| 30 | + description: | ||
| 31 | + name: ffi | ||
| 32 | + sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21" | ||
| 33 | + url: "https://pub.dev" | ||
| 34 | + source: hosted | ||
| 35 | + version: "2.1.2" | ||
| 36 | + flutter: | ||
| 37 | + dependency: transitive | ||
| 38 | + description: flutter | ||
| 39 | + source: sdk | ||
| 40 | + version: "0.0.0" | ||
| 41 | + lints: | ||
| 42 | + dependency: "direct dev" | ||
| 43 | + description: | ||
| 44 | + name: lints | ||
| 45 | + sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290 | ||
| 46 | + url: "https://pub.dev" | ||
| 47 | + source: hosted | ||
| 48 | + version: "3.0.0" | ||
| 49 | + material_color_utilities: | ||
| 50 | + dependency: transitive | ||
| 51 | + description: | ||
| 52 | + name: material_color_utilities | ||
| 53 | + sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a" | ||
| 54 | + url: "https://pub.dev" | ||
| 55 | + source: hosted | ||
| 56 | + version: "0.8.0" | ||
| 57 | + meta: | ||
| 58 | + dependency: transitive | ||
| 59 | + description: | ||
| 60 | + name: meta | ||
| 61 | + sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136" | ||
| 62 | + url: "https://pub.dev" | ||
| 63 | + source: hosted | ||
| 64 | + version: "1.12.0" | ||
| 65 | + path: | ||
| 66 | + dependency: "direct main" | ||
| 67 | + description: | ||
| 68 | + name: path | ||
| 69 | + sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af" | ||
| 70 | + url: "https://pub.dev" | ||
| 71 | + source: hosted | ||
| 72 | + version: "1.9.0" | ||
| 73 | + sherpa_onnx: | ||
| 74 | + dependency: "direct main" | ||
| 75 | + description: | ||
| 76 | + name: sherpa_onnx | ||
| 77 | + sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8 | ||
| 78 | + url: "https://pub.dev" | ||
| 79 | + source: hosted | ||
| 80 | + version: "1.9.29" | ||
| 81 | + sky_engine: | ||
| 82 | + dependency: transitive | ||
| 83 | + description: flutter | ||
| 84 | + source: sdk | ||
| 85 | + version: "0.0.99" | ||
| 86 | + vector_math: | ||
| 87 | + dependency: transitive | ||
| 88 | + description: | ||
| 89 | + name: vector_math | ||
| 90 | + sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803" | ||
| 91 | + url: "https://pub.dev" | ||
| 92 | + source: hosted | ||
| 93 | + version: "2.1.4" | ||
| 94 | +sdks: | ||
| 95 | + dart: ">=3.4.0 <4.0.0" | ||
| 96 | + flutter: ">=3.3.0" |
dart-api-examples/tts/pubspec.yaml
0 → 100644
| 1 | +name: tts | ||
| 2 | +description: A sample command-line application. | ||
| 3 | +version: 1.0.0 | ||
| 4 | +# repository: https://github.com/my_org/my_repo | ||
| 5 | + | ||
| 6 | +environment: | ||
| 7 | + sdk: ^3.4.0 | ||
| 8 | + | ||
| 9 | +# Add regular dependencies here. | ||
| 10 | +dependencies: | ||
| 11 | + sherpa_onnx: ^1.9.29 | ||
| 12 | + path: ^1.9.0 | ||
| 13 | + args: ^2.5.0 | ||
| 14 | + | ||
| 15 | +dev_dependencies: | ||
| 16 | + lints: ^3.0.0 |
dart-api-examples/tts/run-coqui.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +# Please visit | ||
| 9 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 10 | +# to download more models | ||
| 11 | + | ||
| 12 | +if [[ ! -f ./vits-coqui-de-css10/tokens.txt ]]; then | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2 | ||
| 14 | + tar xvf vits-coqui-de-css10.tar.bz2 | ||
| 15 | + rm vits-coqui-de-css10.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +# It is a character-based TTS model, so there is no need to use a lexicon | ||
| 19 | +dart run \ | ||
| 20 | + ./bin/coqui.dart \ | ||
| 21 | + --model ./vits-coqui-de-css10/model.onnx \ | ||
| 22 | + --tokens ./vits-coqui-de-css10/tokens.txt \ | ||
| 23 | + --sid 0 \ | ||
| 24 | + --speed 0.7 \ | ||
| 25 | + --text 'Alles hat ein Ende, nur die Wurst hat zwei.' \ | ||
| 26 | + --output-wav coqui-0.wav | ||
| 27 | + | ||
| 28 | +ls -lh *.wav |
dart-api-examples/tts/run-piper.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +# Please visit | ||
| 9 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 10 | +# to download more models | ||
| 11 | + | ||
| 12 | +if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 14 | + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 15 | + rm vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +dart run \ | ||
| 19 | + ./bin/piper.dart \ | ||
| 20 | + --model ./vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx \ | ||
| 21 | + --tokens ./vits-piper-en_US-libritts_r-medium/tokens.txt \ | ||
| 22 | + --data-dir ./vits-piper-en_US-libritts_r-medium/espeak-ng-data \ | ||
| 23 | + --sid 351 \ | ||
| 24 | + --speed 1.0 \ | ||
| 25 | + --text 'How are you doing? This is a speech to text example, using next generation kaldi with piper.' \ | ||
| 26 | + --output-wav piper-351.wav | ||
| 27 | + | ||
| 28 | +ls -lh *.wav |
dart-api-examples/tts/run-zh.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +# Please visit | ||
| 9 | +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 10 | +# to download more models | ||
| 11 | + | ||
| 12 | +if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then | ||
| 13 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 | ||
| 14 | + tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 | ||
| 15 | + rm sherpa-onnx-vits-zh-ll.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +dart run \ | ||
| 19 | + ./bin/zh.dart \ | ||
| 20 | + --model ./sherpa-onnx-vits-zh-ll/model.onnx \ | ||
| 21 | + --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ | ||
| 22 | + --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ | ||
| 23 | + --dict-dir ./sherpa-onnx-vits-zh-ll/dict \ | ||
| 24 | + --sid 2 \ | ||
| 25 | + --speed 1.0 \ | ||
| 26 | + --text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \ | ||
| 27 | + --output-wav zh-jieba-2.wav | ||
| 28 | + | ||
| 29 | +dart run \ | ||
| 30 | + ./bin/zh.dart \ | ||
| 31 | + --model ./sherpa-onnx-vits-zh-ll/model.onnx \ | ||
| 32 | + --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \ | ||
| 33 | + --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \ | ||
| 34 | + --dict-dir ./sherpa-onnx-vits-zh-ll/dict \ | ||
| 35 | + --rule-fsts "./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst" \ | ||
| 36 | + --sid 3 \ | ||
| 37 | + --speed 1.0 \ | ||
| 38 | + --text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \ | ||
| 39 | + --output-wav zh-jieba-3.wav | ||
| 40 | + | ||
| 41 | +ls -lh *.wav |
scripts/dart/tts-pubspec.yaml
0 → 100644
| 1 | +name: tts | ||
| 2 | +description: A sample command-line application. | ||
| 3 | +version: 1.0.0 | ||
| 4 | +# repository: https://github.com/my_org/my_repo | ||
| 5 | + | ||
| 6 | +environment: | ||
| 7 | + sdk: ^3.4.0 | ||
| 8 | + | ||
| 9 | +# Add regular dependencies here. | ||
| 10 | +dependencies: | ||
| 11 | + sherpa_onnx: | ||
| 12 | + path: ../../sherpa-onnx/flutter | ||
| 13 | + path: ^1.9.0 | ||
| 14 | + args: ^2.5.0 | ||
| 15 | + | ||
| 16 | +dev_dependencies: | ||
| 17 | + lints: ^3.0.0 |
| @@ -8,6 +8,7 @@ export 'src/offline_stream.dart'; | @@ -8,6 +8,7 @@ export 'src/offline_stream.dart'; | ||
| 8 | export 'src/online_recognizer.dart'; | 8 | export 'src/online_recognizer.dart'; |
| 9 | export 'src/online_stream.dart'; | 9 | export 'src/online_stream.dart'; |
| 10 | export 'src/speaker_identification.dart'; | 10 | export 'src/speaker_identification.dart'; |
| 11 | +export 'src/tts.dart'; | ||
| 11 | export 'src/vad.dart'; | 12 | export 'src/vad.dart'; |
| 12 | export 'src/wave_reader.dart'; | 13 | export 'src/wave_reader.dart'; |
| 13 | export 'src/wave_writer.dart'; | 14 | export 'src/wave_writer.dart'; |
| @@ -2,6 +2,55 @@ | @@ -2,6 +2,55 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'package:ffi/ffi.dart'; | 3 | import 'package:ffi/ffi.dart'; |
| 4 | 4 | ||
| 5 | +final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct { | ||
| 6 | + external Pointer<Utf8> model; | ||
| 7 | + external Pointer<Utf8> lexicon; | ||
| 8 | + external Pointer<Utf8> tokens; | ||
| 9 | + external Pointer<Utf8> dataDir; | ||
| 10 | + | ||
| 11 | + @Float() | ||
| 12 | + external double noiseScale; | ||
| 13 | + | ||
| 14 | + @Float() | ||
| 15 | + external double noiseScaleW; | ||
| 16 | + | ||
| 17 | + @Float() | ||
| 18 | + external double lengthScale; | ||
| 19 | + | ||
| 20 | + external Pointer<Utf8> dictDir; | ||
| 21 | +} | ||
| 22 | + | ||
| 23 | +final class SherpaOnnxOfflineTtsModelConfig extends Struct { | ||
| 24 | + external SherpaOnnxOfflineTtsVitsModelConfig vits; | ||
| 25 | + @Int32() | ||
| 26 | + external int numThreads; | ||
| 27 | + | ||
| 28 | + @Int32() | ||
| 29 | + external int debug; | ||
| 30 | + | ||
| 31 | + external Pointer<Utf8> provider; | ||
| 32 | +} | ||
| 33 | + | ||
| 34 | +final class SherpaOnnxOfflineTtsConfig extends Struct { | ||
| 35 | + external SherpaOnnxOfflineTtsModelConfig model; | ||
| 36 | + external Pointer<Utf8> ruleFsts; | ||
| 37 | + | ||
| 38 | + @Int32() | ||
| 39 | + external int maxNumSenetences; | ||
| 40 | + | ||
| 41 | + external Pointer<Utf8> ruleFars; | ||
| 42 | +} | ||
| 43 | + | ||
| 44 | +final class SherpaOnnxGeneratedAudio extends Struct { | ||
| 45 | + external Pointer<Float> samples; | ||
| 46 | + | ||
| 47 | + @Int32() | ||
| 48 | + external int n; | ||
| 49 | + | ||
| 50 | + @Int32() | ||
| 51 | + external int sampleRate; | ||
| 52 | +} | ||
| 53 | + | ||
| 5 | final class SherpaOnnxFeatureConfig extends Struct { | 54 | final class SherpaOnnxFeatureConfig extends Struct { |
| 6 | @Int32() | 55 | @Int32() |
| 7 | external int sampleRate; | 56 | external int sampleRate; |
| @@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | @@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | ||
| 218 | external Pointer<Utf8> provider; | 267 | external Pointer<Utf8> provider; |
| 219 | } | 268 | } |
| 220 | 269 | ||
| 270 | +final class SherpaOnnxOfflineTts extends Opaque {} | ||
| 271 | + | ||
| 221 | final class SherpaOnnxCircularBuffer extends Opaque {} | 272 | final class SherpaOnnxCircularBuffer extends Opaque {} |
| 222 | 273 | ||
| 223 | final class SherpaOnnxVoiceActivityDetector extends Opaque {} | 274 | final class SherpaOnnxVoiceActivityDetector extends Opaque {} |
| @@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | @@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | ||
| 234 | 285 | ||
| 235 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} | 286 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} |
| 236 | 287 | ||
| 288 | +typedef SherpaOnnxCreateOfflineTtsNative = Pointer<SherpaOnnxOfflineTts> | ||
| 289 | + Function(Pointer<SherpaOnnxOfflineTtsConfig>); | ||
| 290 | + | ||
| 291 | +typedef SherpaOnnxCreateOfflineTts = SherpaOnnxCreateOfflineTtsNative; | ||
| 292 | + | ||
| 293 | +typedef SherpaOnnxDestroyOfflineTtsNative = Void Function( | ||
| 294 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 295 | + | ||
| 296 | +typedef SherpaOnnxDestroyOfflineTts = void Function( | ||
| 297 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 298 | + | ||
| 299 | +typedef SherpaOnnxOfflineTtsSampleRateNative = Int32 Function( | ||
| 300 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 301 | + | ||
| 302 | +typedef SherpaOnnxOfflineTtsSampleRate = int Function( | ||
| 303 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 304 | + | ||
| 305 | +typedef SherpaOnnxOfflineTtsNumSpeakersNative = Int32 Function( | ||
| 306 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 307 | + | ||
| 308 | +typedef SherpaOnnxOfflineTtsNumSpeakers = int Function( | ||
| 309 | + Pointer<SherpaOnnxOfflineTts>); | ||
| 310 | + | ||
| 311 | +typedef SherpaOnnxOfflineTtsGenerateNative = Pointer<SherpaOnnxGeneratedAudio> | ||
| 312 | + Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, Int32, Float); | ||
| 313 | + | ||
| 314 | +typedef SherpaOnnxOfflineTtsGenerate = Pointer<SherpaOnnxGeneratedAudio> | ||
| 315 | + Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, int, double); | ||
| 316 | + | ||
| 317 | +typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function( | ||
| 318 | + Pointer<SherpaOnnxGeneratedAudio>); | ||
| 319 | + | ||
| 320 | +typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function( | ||
| 321 | + Pointer<SherpaOnnxGeneratedAudio>); | ||
| 322 | + | ||
| 323 | +typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function( | ||
| 324 | + Pointer<Float>, Int32); | ||
| 325 | + | ||
| 326 | +typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative | ||
| 327 | + = Pointer<SherpaOnnxGeneratedAudio> Function( | ||
| 328 | + Pointer<SherpaOnnxOfflineTts>, | ||
| 329 | + Pointer<Utf8>, | ||
| 330 | + Int32, | ||
| 331 | + Float, | ||
| 332 | + Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>); | ||
| 333 | + | ||
| 334 | +typedef SherpaOnnxOfflineTtsGenerateWithCallback | ||
| 335 | + = Pointer<SherpaOnnxGeneratedAudio> Function( | ||
| 336 | + Pointer<SherpaOnnxOfflineTts>, | ||
| 337 | + Pointer<Utf8>, | ||
| 338 | + int, | ||
| 339 | + double, | ||
| 340 | + Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>); | ||
| 341 | + | ||
| 237 | typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> | 342 | typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> |
| 238 | Function(Pointer<SherpaOnnxOfflineRecognizerConfig>); | 343 | Function(Pointer<SherpaOnnxOfflineRecognizerConfig>); |
| 239 | 344 | ||
| @@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | @@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | ||
| 608 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); | 713 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); |
| 609 | 714 | ||
| 610 | class SherpaOnnxBindings { | 715 | class SherpaOnnxBindings { |
| 716 | + static SherpaOnnxCreateOfflineTts? createOfflineTts; | ||
| 717 | + static SherpaOnnxDestroyOfflineTts? destroyOfflineTts; | ||
| 718 | + static SherpaOnnxOfflineTtsSampleRate? offlineTtsSampleRate; | ||
| 719 | + static SherpaOnnxOfflineTtsNumSpeakers? offlineTtsNumSpeakers; | ||
| 720 | + static SherpaOnnxOfflineTtsGenerate? offlineTtsGenerate; | ||
| 721 | + static SherpaOnnxDestroyOfflineTtsGeneratedAudio? | ||
| 722 | + destroyOfflineTtsGeneratedAudio; | ||
| 723 | + static SherpaOnnxOfflineTtsGenerateWithCallback? | ||
| 724 | + offlineTtsGenerateWithCallback; | ||
| 725 | + | ||
| 611 | static CreateOfflineRecognizer? createOfflineRecognizer; | 726 | static CreateOfflineRecognizer? createOfflineRecognizer; |
| 612 | static DestroyOfflineRecognizer? destroyOfflineRecognizer; | 727 | static DestroyOfflineRecognizer? destroyOfflineRecognizer; |
| 613 | static CreateOfflineStream? createOfflineStream; | 728 | static CreateOfflineStream? createOfflineStream; |
| @@ -740,6 +855,43 @@ class SherpaOnnxBindings { | @@ -740,6 +855,43 @@ class SherpaOnnxBindings { | ||
| 740 | static SherpaOnnxFreeWave? freeWave; | 855 | static SherpaOnnxFreeWave? freeWave; |
| 741 | 856 | ||
| 742 | static void init(DynamicLibrary dynamicLibrary) { | 857 | static void init(DynamicLibrary dynamicLibrary) { |
| 858 | + createOfflineTts ??= dynamicLibrary | ||
| 859 | + .lookup<NativeFunction<SherpaOnnxCreateOfflineTtsNative>>( | ||
| 860 | + 'SherpaOnnxCreateOfflineTts') | ||
| 861 | + .asFunction(); | ||
| 862 | + | ||
| 863 | + destroyOfflineTts ??= dynamicLibrary | ||
| 864 | + .lookup<NativeFunction<SherpaOnnxDestroyOfflineTtsNative>>( | ||
| 865 | + 'SherpaOnnxDestroyOfflineTts') | ||
| 866 | + .asFunction(); | ||
| 867 | + | ||
| 868 | + offlineTtsSampleRate ??= dynamicLibrary | ||
| 869 | + .lookup<NativeFunction<SherpaOnnxOfflineTtsSampleRateNative>>( | ||
| 870 | + 'SherpaOnnxOfflineTtsSampleRate') | ||
| 871 | + .asFunction(); | ||
| 872 | + | ||
| 873 | + offlineTtsNumSpeakers ??= dynamicLibrary | ||
| 874 | + .lookup<NativeFunction<SherpaOnnxOfflineTtsNumSpeakersNative>>( | ||
| 875 | + 'SherpaOnnxOfflineTtsNumSpeakers') | ||
| 876 | + .asFunction(); | ||
| 877 | + | ||
| 878 | + offlineTtsGenerate ??= dynamicLibrary | ||
| 879 | + .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateNative>>( | ||
| 880 | + 'SherpaOnnxOfflineTtsGenerate') | ||
| 881 | + .asFunction(); | ||
| 882 | + | ||
| 883 | + destroyOfflineTtsGeneratedAudio ??= dynamicLibrary | ||
| 884 | + .lookup< | ||
| 885 | + NativeFunction< | ||
| 886 | + SherpaOnnxDestroyOfflineTtsGeneratedAudioNative>>( | ||
| 887 | + 'SherpaOnnxDestroyOfflineTtsGeneratedAudio') | ||
| 888 | + .asFunction(); | ||
| 889 | + | ||
| 890 | + offlineTtsGenerateWithCallback ??= dynamicLibrary | ||
| 891 | + .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithCallbackNative>>( | ||
| 892 | + 'SherpaOnnxOfflineTtsGenerateWithCallback') | ||
| 893 | + .asFunction(); | ||
| 894 | + | ||
| 743 | createOfflineRecognizer ??= dynamicLibrary | 895 | createOfflineRecognizer ??= dynamicLibrary |
| 744 | .lookup<NativeFunction<CreateOfflineRecognizerNative>>( | 896 | .lookup<NativeFunction<CreateOfflineRecognizerNative>>( |
| 745 | 'CreateOfflineRecognizer') | 897 | 'CreateOfflineRecognizer') |
sherpa-onnx/flutter/lib/src/tts.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:convert'; | ||
| 3 | +import 'dart:ffi'; | ||
| 4 | +import 'dart:typed_data'; | ||
| 5 | + | ||
| 6 | +import 'package:ffi/ffi.dart'; | ||
| 7 | + | ||
| 8 | +import './sherpa_onnx_bindings.dart'; | ||
| 9 | + | ||
| 10 | +class OfflineTtsVitsModelConfig { | ||
| 11 | + const OfflineTtsVitsModelConfig({ | ||
| 12 | + required this.model, | ||
| 13 | + this.lexicon = '', | ||
| 14 | + required this.tokens, | ||
| 15 | + this.dataDir = '', | ||
| 16 | + this.noiseScale = 0.667, | ||
| 17 | + this.noiseScaleW = 0.8, | ||
| 18 | + this.lengthScale = 1.0, | ||
| 19 | + this.dictDir = '', | ||
| 20 | + }); | ||
| 21 | + | ||
| 22 | + @override | ||
| 23 | + String toString() { | ||
| 24 | + return 'OfflineTtsVitsModelConfig(model: $model, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, noiseScaleW: $noiseScaleW, lengthScale: $lengthScale, dictDir: $dictDir)'; | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + final String model; | ||
| 28 | + final String lexicon; | ||
| 29 | + final String tokens; | ||
| 30 | + final String dataDir; | ||
| 31 | + final double noiseScale; | ||
| 32 | + final double noiseScaleW; | ||
| 33 | + final double lengthScale; | ||
| 34 | + final String dictDir; | ||
| 35 | +} | ||
| 36 | + | ||
| 37 | +class OfflineTtsModelConfig { | ||
| 38 | + const OfflineTtsModelConfig({ | ||
| 39 | + required this.vits, | ||
| 40 | + this.numThreads = 1, | ||
| 41 | + this.debug = true, | ||
| 42 | + this.provider = 'cpu', | ||
| 43 | + }); | ||
| 44 | + | ||
| 45 | + @override | ||
| 46 | + String toString() { | ||
| 47 | + return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)'; | ||
| 48 | + } | ||
| 49 | + | ||
| 50 | + final OfflineTtsVitsModelConfig vits; | ||
| 51 | + final int numThreads; | ||
| 52 | + final bool debug; | ||
| 53 | + final String provider; | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +class OfflineTtsConfig { | ||
| 57 | + const OfflineTtsConfig({ | ||
| 58 | + required this.model, | ||
| 59 | + this.ruleFsts = '', | ||
| 60 | + this.maxNumSenetences = 1, | ||
| 61 | + this.ruleFars = '', | ||
| 62 | + }); | ||
| 63 | + | ||
| 64 | + @override | ||
| 65 | + String toString() { | ||
| 66 | + return 'OfflineTtsConfig(model: $model, ruleFsts: $ruleFsts, maxNumSenetences: $maxNumSenetences, ruleFars: $ruleFars)'; | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + final OfflineTtsModelConfig model; | ||
| 70 | + final String ruleFsts; | ||
| 71 | + final int maxNumSenetences; | ||
| 72 | + final String ruleFars; | ||
| 73 | +} | ||
| 74 | + | ||
| 75 | +class GeneratedAudio { | ||
| 76 | + GeneratedAudio({ | ||
| 77 | + required this.samples, | ||
| 78 | + required this.sampleRate, | ||
| 79 | + }); | ||
| 80 | + | ||
| 81 | + final Float32List samples; | ||
| 82 | + final int sampleRate; | ||
| 83 | +} | ||
| 84 | + | ||
| 85 | +class OfflineTts { | ||
| 86 | + OfflineTts._({required this.ptr, required this.config}); | ||
| 87 | + | ||
| 88 | + /// The user is responsible to call the OfflineTts.free() | ||
| 89 | + /// method of the returned instance to avoid memory leak. | ||
| 90 | + factory OfflineTts(OfflineTtsConfig config) { | ||
| 91 | + final c = calloc<SherpaOnnxOfflineTtsConfig>(); | ||
| 92 | + c.ref.model.vits.model = config.model.vits.model.toNativeUtf8(); | ||
| 93 | + c.ref.model.vits.lexicon = config.model.vits.lexicon.toNativeUtf8(); | ||
| 94 | + c.ref.model.vits.tokens = config.model.vits.tokens.toNativeUtf8(); | ||
| 95 | + c.ref.model.vits.dataDir = config.model.vits.dataDir.toNativeUtf8(); | ||
| 96 | + c.ref.model.vits.noiseScale = config.model.vits.noiseScale; | ||
| 97 | + c.ref.model.vits.noiseScaleW = config.model.vits.noiseScaleW; | ||
| 98 | + c.ref.model.vits.lengthScale = config.model.vits.lengthScale; | ||
| 99 | + c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8(); | ||
| 100 | + | ||
| 101 | + c.ref.model.numThreads = config.model.numThreads; | ||
| 102 | + c.ref.model.debug = config.model.debug ? 1 : 0; | ||
| 103 | + c.ref.model.provider = config.model.provider.toNativeUtf8(); | ||
| 104 | + | ||
| 105 | + c.ref.ruleFsts = config.ruleFsts.toNativeUtf8(); | ||
| 106 | + c.ref.maxNumSenetences = config.maxNumSenetences; | ||
| 107 | + c.ref.ruleFars = config.ruleFars.toNativeUtf8(); | ||
| 108 | + | ||
| 109 | + final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr; | ||
| 110 | + | ||
| 111 | + calloc.free(c.ref.ruleFars); | ||
| 112 | + calloc.free(c.ref.ruleFsts); | ||
| 113 | + calloc.free(c.ref.model.provider); | ||
| 114 | + calloc.free(c.ref.model.vits.dictDir); | ||
| 115 | + calloc.free(c.ref.model.vits.dataDir); | ||
| 116 | + calloc.free(c.ref.model.vits.tokens); | ||
| 117 | + calloc.free(c.ref.model.vits.lexicon); | ||
| 118 | + calloc.free(c.ref.model.vits.model); | ||
| 119 | + | ||
| 120 | + return OfflineTts._(ptr: ptr, config: config); | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + void free() { | ||
| 124 | + SherpaOnnxBindings.destroyOfflineTts?.call(ptr); | ||
| 125 | + ptr = nullptr; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + GeneratedAudio generate( | ||
| 129 | + {required String text, int sid = 0, double speed = 1.0}) { | ||
| 130 | + final Pointer<Utf8> textPtr = text.toNativeUtf8(); | ||
| 131 | + final p = | ||
| 132 | + SherpaOnnxBindings.offlineTtsGenerate?.call(ptr, textPtr, sid, speed) ?? | ||
| 133 | + nullptr; | ||
| 134 | + calloc.free(textPtr); | ||
| 135 | + | ||
| 136 | + if (p == nullptr) { | ||
| 137 | + return GeneratedAudio(samples: Float32List(0), sampleRate: 0); | ||
| 138 | + } | ||
| 139 | + | ||
| 140 | + final samples = p.ref.samples.asTypedList(p.ref.n); | ||
| 141 | + final sampleRate = p.ref.sampleRate; | ||
| 142 | + final newSamples = Float32List.fromList(samples); | ||
| 143 | + | ||
| 144 | + SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p); | ||
| 145 | + | ||
| 146 | + return GeneratedAudio(samples: newSamples, sampleRate: sampleRate); | ||
| 147 | + } | ||
| 148 | + | ||
| 149 | + GeneratedAudio generateWithCallback( | ||
| 150 | + {required String text, | ||
| 151 | + int sid = 0, | ||
| 152 | + double speed = 1.0, | ||
| 153 | + required void Function(Float32List samples) callback}) { | ||
| 154 | + // see | ||
| 155 | + // https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285 | ||
| 156 | + // https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat | ||
| 157 | + // https://github.com/dart-lang/sdk/blob/main/tests/ffi/isolate_local_function_callbacks_test.dart#L46 | ||
| 158 | + final wrapper = | ||
| 159 | + NativeCallable<SherpaOnnxGeneratedAudioCallbackNative>.isolateLocal( | ||
| 160 | + (Pointer<Float> samples, int n) { | ||
| 161 | + final s = samples.asTypedList(n); | ||
| 162 | + final newSamples = Float32List.fromList(s); | ||
| 163 | + callback(newSamples); | ||
| 164 | + }); | ||
| 165 | + | ||
| 166 | + final Pointer<Utf8> textPtr = text.toNativeUtf8(); | ||
| 167 | + final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback | ||
| 168 | + ?.call(ptr, textPtr, sid, speed, wrapper.nativeFunction) ?? | ||
| 169 | + nullptr; | ||
| 170 | + | ||
| 171 | + calloc.free(textPtr); | ||
| 172 | + wrapper.close(); | ||
| 173 | + | ||
| 174 | + if (p == nullptr) { | ||
| 175 | + return GeneratedAudio(samples: Float32List(0), sampleRate: 0); | ||
| 176 | + } | ||
| 177 | + | ||
| 178 | + final samples = p.ref.samples.asTypedList(p.ref.n); | ||
| 179 | + final sampleRate = p.ref.sampleRate; | ||
| 180 | + final newSamples = Float32List.fromList(samples); | ||
| 181 | + | ||
| 182 | + SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p); | ||
| 183 | + | ||
| 184 | + return GeneratedAudio(samples: newSamples, sampleRate: sampleRate); | ||
| 185 | + } | ||
| 186 | + | ||
| 187 | + int get sampleRate => | ||
| 188 | + SherpaOnnxBindings.offlineTtsSampleRate?.call(this.ptr) ?? 0; | ||
| 189 | + | ||
| 190 | + int get numSpeakers => | ||
| 191 | + SherpaOnnxBindings.offlineTtsNumSpeakers?.call(this.ptr) ?? 0; | ||
| 192 | + | ||
| 193 | + Pointer<SherpaOnnxOfflineTts> ptr; | ||
| 194 | + OfflineTtsConfig config; | ||
| 195 | +} |
-
请 注册 或 登录 后发表评论