Add TTS API and examples for Dart (#1010)

Fangjun Kuang · GitHub
Commit e52d32b95b0258b5e603fa61816080507413a500 e52d32b9 1 parent e3077670
.github/scripts/test-dart.sh
.github/workflows/test-dart.yaml
dart-api-examples/streaming-asr/README.md
dart-api-examples/tts/.gitignore
dart-api-examples/tts/CHANGELOG.md
dart-api-examples/tts/README.md
dart-api-examples/tts/analysis_options.yaml
dart-api-examples/tts/bin/coqui.dart
dart-api-examples/tts/bin/init.dart
dart-api-examples/tts/bin/piper.dart
dart-api-examples/tts/bin/zh.dart
dart-api-examples/tts/pubspec.lock
dart-api-examples/tts/pubspec.yaml
dart-api-examples/tts/run-coqui.sh
dart-api-examples/tts/run-piper.sh
dart-api-examples/tts/run-zh.sh
scripts/dart/tts-pubspec.yaml
sherpa-onnx/flutter/lib/sherpa_onnx.dart
sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
sherpa-onnx/flutter/lib/src/tts.dart
--- a/.github/scripts/test-dart.sh
查看文件 @e52d32b
+++ b/.github/scripts/test-dart.sh
查看文件 @e52d32b
@@ -4,6 +4,22 @@ set -ex
 
 cd dart-api-examples
 
+ pushd tts
+ 
+ echo '----------piper tts----------'
+ ./run-piper.sh
+ rm -rf vits-piper-*
+ 
+ echo '----------coqui tts----------'
+ ./run-coqui.sh
+ rm -rf vits-coqui-*
+ 
+ echo '----------zh tts----------'
+ ./run-zh.sh
+ rm -rf sherpa-onnx-*
+ 
+ popd # tts
+ 
 pushd streaming-asr
 
 echo '----------streaming zipformer ctc HLG----------'
--- a/.github/workflows/test-dart.yaml
查看文件 @e52d32b
+++ b/.github/workflows/test-dart.yaml
查看文件 @e52d32b
@@ -92,5 +92,6 @@ jobs:
           cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
           cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
           cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
+           cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
 
           .github/scripts/test-dart.sh
--- a/dart-api-examples/streaming-asr/README.md
查看文件 @e52d32b
+++ b/dart-api-examples/streaming-asr/README.md
查看文件 @e52d32b
 # Introduction
 
 This folder contains examples for streaming ASR with Dart API.
+ 
 | File | Description|
 |------|------------|
 |[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
--- a/dart-api-examples/tts/.gitignore 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/.gitignore 0 → 100644
查看文件 @e52d32b
+ # https://dart.dev/guides/libraries/private-files
+ # Created by `dart pub`
+ .dart_tool/
--- a/dart-api-examples/tts/CHANGELOG.md 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/CHANGELOG.md 0 → 100644
查看文件 @e52d32b
+ ## 1.0.0
+ 
+ - Initial version.
--- a/dart-api-examples/tts/README.md 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/README.md 0 → 100644
查看文件 @e52d32b
+ # Introduction
+ 
+ This folder contains examples for text to speech with Dart API.
+ 
+ | File | Description|
+ |------|------------|
+ |[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
+ |[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
+ |[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|
+ 
--- a/dart-api-examples/tts/analysis_options.yaml 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/analysis_options.yaml 0 → 100644
查看文件 @e52d32b
+ # This file configures the static analysis results for your project (errors,
+ # warnings, and lints).
+ #
+ # This enables the 'recommended' set of lints from `package:lints`.
+ # This set helps identify many issues that may lead to problems when running
+ # or consuming Dart code, and enforces writing Dart using a single, idiomatic
+ # style and format.
+ #
+ # If you want a smaller set of lints you can change this to specify
+ # 'package:lints/core.yaml'. These are just the most critical lints
+ # (the recommended set includes the core lints).
+ # The core lints are also what is used by pub.dev for scoring packages.
+ 
+ include: package:lints/recommended.yaml
+ 
+ # Uncomment the following section to specify additional rules.
+ 
+ # linter:
+ #   rules:
+ #     - camel_case_types
+ 
+ # analyzer:
+ #   exclude:
+ #     - path/to/excluded/files/**
+ 
+ # For more information about the core and recommended set of lints, see
+ # https://dart.dev/go/core-lints
+ 
+ # For additional information about configuring this file, see
+ # https://dart.dev/guides/language/analysis-options
--- a/dart-api-examples/tts/bin/coqui.dart 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/bin/coqui.dart 0 → 100644
查看文件 @e52d32b
+ // Copyright (c)  2024  Xiaomi Corporation
+ import 'dart:io';
+ import 'dart:typed_data';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the ONNX model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('text', help: 'Text to generate TTS for')
+     ..addOption('output-wav', help: 'Filename to save the generated audio')
+     ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+     ..addOption(
+       'sid',
+       help: 'Speaker ID to select. Used only for multi-speaker TTS',
+       defaultsTo: '0',
+     );
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['tokens'] == null ||
+       res['output-wav'] == null ||
+       res['text'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+   final model = res['model'] as String;
+   final tokens = res['tokens'] as String;
+   final text = res['text'] as String;
+   final outputWav = res['output-wav'] as String;
+   var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+   final sid = int.tryParse(res['sid'] as String) ?? 0;
+ 
+   if (speed == 0) {
+     speed = 1.0;
+   }
+ 
+   final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+     model: model,
+     tokens: tokens,
+     lengthScale: 1 / speed,
+   );
+ 
+   final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+     vits: vits,
+     numThreads: 1,
+     debug: true,
+   );
+   final config = sherpa_onnx.OfflineTtsConfig(
+     model: modelConfig,
+     maxNumSenetences: 1,
+   );
+ 
+   final tts = sherpa_onnx.OfflineTts(config);
+   final audio = tts.generate(text: text, sid: sid, speed: speed);
+   tts.free();
+ 
+   sherpa_onnx.writeWave(
+     filename: outputWav,
+     samples: audio.samples,
+     sampleRate: audio.sampleRate,
+   );
+   print('Saved to ${outputWav}');
+ }
--- a/dart-api-examples/tts/bin/init.dart 0 → 120000
查看文件 @e52d32b
+++ b/dart-api-examples/tts/bin/init.dart 0 → 120000
查看文件 @e52d32b
+ ../../vad/bin/init.dart
\ No newline at end of file
--- a/dart-api-examples/tts/bin/piper.dart 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/bin/piper.dart 0 → 100644
查看文件 @e52d32b
+ // Copyright (c)  2024  Xiaomi Corporation
+ import 'dart:io';
+ import 'dart:typed_data';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the ONNX model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('data-dir', help: 'Path to espeak-ng-data directory')
+     ..addOption('text', help: 'Text to generate TTS for')
+     ..addOption('output-wav', help: 'Filename to save the generated audio')
+     ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+     ..addOption(
+       'sid',
+       help: 'Speaker ID to select. Used only for multi-speaker TTS',
+       defaultsTo: '0',
+     );
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['tokens'] == null ||
+       res['data-dir'] == null ||
+       res['output-wav'] == null ||
+       res['text'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+   final model = res['model'] as String;
+   final tokens = res['tokens'] as String;
+   final dataDir = res['data-dir'] as String;
+   final text = res['text'] as String;
+   final outputWav = res['output-wav'] as String;
+   var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+   final sid = int.tryParse(res['sid'] as String) ?? 0;
+ 
+   if (speed == 0) {
+     speed = 1.0;
+   }
+ 
+   final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+     model: model,
+     tokens: tokens,
+     dataDir: dataDir,
+     lengthScale: 1 / speed,
+   );
+ 
+   final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+     vits: vits,
+     numThreads: 1,
+     debug: true,
+   );
+   final config = sherpa_onnx.OfflineTtsConfig(
+     model: modelConfig,
+     maxNumSenetences: 1,
+   );
+ 
+   final tts = sherpa_onnx.OfflineTts(config);
+   final audio = tts.generateWithCallback(
+       text: text,
+       sid: sid,
+       speed: speed,
+       callback: (Float32List samples) {
+         print('${samples.length} samples received');
+         // You can play samples in a separate thread/isolate
+       });
+   tts.free();
+ 
+   sherpa_onnx.writeWave(
+     filename: outputWav,
+     samples: audio.samples,
+     sampleRate: audio.sampleRate,
+   );
+   print('Saved to ${outputWav}');
+ }
--- a/dart-api-examples/tts/bin/zh.dart 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/bin/zh.dart 0 → 100644
查看文件 @e52d32b
+ // Copyright (c)  2024  Xiaomi Corporation
+ import 'dart:io';
+ import 'dart:typed_data';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the ONNX model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('lexicon', help: 'Path to lexicon.txt')
+     ..addOption(
+       'dict-dir',
+       help: 'Path to jieba dict directory',
+       defaultsTo: '',
+     )
+     ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
+     ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
+     ..addOption('text', help: 'Text to generate TTS for')
+     ..addOption('output-wav', help: 'Filename to save the generated audio')
+     ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
+     ..addOption(
+       'sid',
+       help: 'Speaker ID to select. Used only for multi-speaker TTS',
+       defaultsTo: '0',
+     );
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['lexicon'] == null ||
+       res['tokens'] == null ||
+       res['output-wav'] == null ||
+       res['text'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+   final model = res['model'] as String;
+   final lexicon = res['lexicon'] as String;
+   final tokens = res['tokens'] as String;
+   final dictDir = res['dict-dir'] as String;
+   final ruleFsts = res['rule-fsts'] as String;
+   final ruleFars = res['rule-fars'] as String;
+   final text = res['text'] as String;
+   final outputWav = res['output-wav'] as String;
+   var speed = double.tryParse(res['speed'] as String) ?? 1.0;
+   final sid = int.tryParse(res['sid'] as String) ?? 0;
+ 
+   if (speed == 0) {
+     speed = 1.0;
+   }
+ 
+   final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
+     model: model,
+     lexicon: lexicon,
+     tokens: tokens,
+     dictDir: dictDir,
+     lengthScale: 1 / speed,
+   );
+ 
+   final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
+     vits: vits,
+     numThreads: 1,
+     debug: true,
+   );
+   final config = sherpa_onnx.OfflineTtsConfig(
+     model: modelConfig,
+     maxNumSenetences: 1,
+     ruleFsts: ruleFsts,
+     ruleFars: ruleFars,
+   );
+ 
+   final tts = sherpa_onnx.OfflineTts(config);
+   final audio = tts.generate(text: text, sid: sid, speed: speed);
+   tts.free();
+ 
+   sherpa_onnx.writeWave(
+     filename: outputWav,
+     samples: audio.samples,
+     sampleRate: audio.sampleRate,
+   );
+   print('Saved to ${outputWav}');
+ }
--- a/dart-api-examples/tts/pubspec.lock 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/pubspec.lock 0 → 100644
查看文件 @e52d32b
+ # Generated by pub
+ # See https://dart.dev/tools/pub/glossary#lockfile
+ packages:
+   args:
+     dependency: "direct main"
+     description:
+       name: args
+       sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a"
+       url: "https://pub.dev"
+     source: hosted
+     version: "2.5.0"
+   characters:
+     dependency: transitive
+     description:
+       name: characters
+       sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
+       url: "https://pub.dev"
+     source: hosted
+     version: "1.3.0"
+   collection:
+     dependency: transitive
+     description:
+       name: collection
+       sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a
+       url: "https://pub.dev"
+     source: hosted
+     version: "1.18.0"
+   ffi:
+     dependency: transitive
+     description:
+       name: ffi
+       sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21"
+       url: "https://pub.dev"
+     source: hosted
+     version: "2.1.2"
+   flutter:
+     dependency: transitive
+     description: flutter
+     source: sdk
+     version: "0.0.0"
+   lints:
+     dependency: "direct dev"
+     description:
+       name: lints
+       sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290
+       url: "https://pub.dev"
+     source: hosted
+     version: "3.0.0"
+   material_color_utilities:
+     dependency: transitive
+     description:
+       name: material_color_utilities
+       sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a"
+       url: "https://pub.dev"
+     source: hosted
+     version: "0.8.0"
+   meta:
+     dependency: transitive
+     description:
+       name: meta
+       sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136"
+       url: "https://pub.dev"
+     source: hosted
+     version: "1.12.0"
+   path:
+     dependency: "direct main"
+     description:
+       name: path
+       sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
+       url: "https://pub.dev"
+     source: hosted
+     version: "1.9.0"
+   sherpa_onnx:
+     dependency: "direct main"
+     description:
+       name: sherpa_onnx
+       sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8
+       url: "https://pub.dev"
+     source: hosted
+     version: "1.9.29"
+   sky_engine:
+     dependency: transitive
+     description: flutter
+     source: sdk
+     version: "0.0.99"
+   vector_math:
+     dependency: transitive
+     description:
+       name: vector_math
+       sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803"
+       url: "https://pub.dev"
+     source: hosted
+     version: "2.1.4"
+ sdks:
+   dart: ">=3.4.0 <4.0.0"
+   flutter: ">=3.3.0"
--- a/dart-api-examples/tts/pubspec.yaml 0 → 100644
查看文件 @e52d32b
+++ b/dart-api-examples/tts/pubspec.yaml 0 → 100644
查看文件 @e52d32b
+ name: tts
+ description: A sample command-line application.
+ version: 1.0.0
+ # repository: https://github.com/my_org/my_repo
+ 
+ environment:
+   sdk: ^3.4.0
+ 
+ # Add regular dependencies here.
+ dependencies:
+   sherpa_onnx: ^1.9.29
+   path: ^1.9.0
+   args: ^2.5.0
+ 
+ dev_dependencies:
+   lints: ^3.0.0
--- a/dart-api-examples/tts/run-coqui.sh 0 → 100755
查看文件 @e52d32b
+++ b/dart-api-examples/tts/run-coqui.sh 0 → 100755
查看文件 @e52d32b
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ 
+ # Please visit
+ # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+ # to download more models
+ 
+ if [[ ! -f ./vits-coqui-de-css10/tokens.txt ]]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
+   tar xvf vits-coqui-de-css10.tar.bz2
+   rm vits-coqui-de-css10.tar.bz2
+ fi
+ 
+ # It is a character-based TTS model, so there is no need to use a lexicon
+ dart run \
+   ./bin/coqui.dart \
+   --model ./vits-coqui-de-css10/model.onnx \
+   --tokens ./vits-coqui-de-css10/tokens.txt \
+   --sid 0 \
+   --speed 0.7 \
+   --text 'Alles hat ein Ende, nur die Wurst hat zwei.' \
+   --output-wav coqui-0.wav
+ 
+ ls -lh *.wav
--- a/dart-api-examples/tts/run-piper.sh 0 → 100755
查看文件 @e52d32b
+++ b/dart-api-examples/tts/run-piper.sh 0 → 100755
查看文件 @e52d32b
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ 
+ # Please visit
+ # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+ # to download more models
+ 
+ if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
+   tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
+   rm vits-piper-en_US-libritts_r-medium.tar.bz2
+ fi
+ 
+ dart run \
+   ./bin/piper.dart \
+   --model ./vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx \
+   --tokens ./vits-piper-en_US-libritts_r-medium/tokens.txt \
+   --data-dir ./vits-piper-en_US-libritts_r-medium/espeak-ng-data \
+   --sid 351 \
+   --speed 1.0 \
+   --text 'How are you doing? This is a speech to text example, using next generation kaldi with piper.' \
+   --output-wav piper-351.wav
+ 
+ ls -lh *.wav
--- a/dart-api-examples/tts/run-zh.sh 0 → 100755
查看文件 @e52d32b
+++ b/dart-api-examples/tts/run-zh.sh 0 → 100755
查看文件 @e52d32b
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ 
+ # Please visit
+ # https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+ # to download more models
+ 
+ if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
+   tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
+   rm sherpa-onnx-vits-zh-ll.tar.bz2
+ fi
+ 
+ dart run \
+   ./bin/zh.dart \
+   --model ./sherpa-onnx-vits-zh-ll/model.onnx \
+   --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
+   --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
+   --dict-dir ./sherpa-onnx-vits-zh-ll/dict \
+   --sid 2 \
+   --speed 1.0 \
+   --text '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。' \
+   --output-wav zh-jieba-2.wav
+ 
+ dart run \
+   ./bin/zh.dart \
+   --model ./sherpa-onnx-vits-zh-ll/model.onnx \
+   --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
+   --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
+   --dict-dir ./sherpa-onnx-vits-zh-ll/dict \
+   --rule-fsts "./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst" \
+   --sid 3 \
+   --speed 1.0 \
+   --text '今天是2024年6月15号，13点23分。如果有困难，请拨打110或者18920240511。123456块钱。' \
+   --output-wav zh-jieba-3.wav
+ 
+ ls -lh *.wav
--- a/scripts/dart/tts-pubspec.yaml 0 → 100644
查看文件 @e52d32b
+++ b/scripts/dart/tts-pubspec.yaml 0 → 100644
查看文件 @e52d32b
+ name: tts
+ description: A sample command-line application.
+ version: 1.0.0
+ # repository: https://github.com/my_org/my_repo
+ 
+ environment:
+   sdk: ^3.4.0
+ 
+ # Add regular dependencies here.
+ dependencies:
+   sherpa_onnx:
+     path: ../../sherpa-onnx/flutter
+   path: ^1.9.0
+   args: ^2.5.0
+ 
+ dev_dependencies:
+   lints: ^3.0.0
--- a/sherpa-onnx/flutter/lib/sherpa_onnx.dart
查看文件 @e52d32b
+++ b/sherpa-onnx/flutter/lib/sherpa_onnx.dart
查看文件 @e52d32b
@@ -8,6 +8,7 @@ export 'src/offline_stream.dart';
 export 'src/online_recognizer.dart';
 export 'src/online_stream.dart';
 export 'src/speaker_identification.dart';
+ export 'src/tts.dart';
 export 'src/vad.dart';
 export 'src/wave_reader.dart';
 export 'src/wave_writer.dart';
--- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
查看文件 @e52d32b
+++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
查看文件 @e52d32b
@@ -2,6 +2,55 @@
 import 'dart:ffi';
 import 'package:ffi/ffi.dart';
 
+ final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
+   external Pointer<Utf8> model;
+   external Pointer<Utf8> lexicon;
+   external Pointer<Utf8> tokens;
+   external Pointer<Utf8> dataDir;
+ 
+   @Float()
+   external double noiseScale;
+ 
+   @Float()
+   external double noiseScaleW;
+ 
+   @Float()
+   external double lengthScale;
+ 
+   external Pointer<Utf8> dictDir;
+ }
+ 
+ final class SherpaOnnxOfflineTtsModelConfig extends Struct {
+   external SherpaOnnxOfflineTtsVitsModelConfig vits;
+   @Int32()
+   external int numThreads;
+ 
+   @Int32()
+   external int debug;
+ 
+   external Pointer<Utf8> provider;
+ }
+ 
+ final class SherpaOnnxOfflineTtsConfig extends Struct {
+   external SherpaOnnxOfflineTtsModelConfig model;
+   external Pointer<Utf8> ruleFsts;
+ 
+   @Int32()
+   external int maxNumSenetences;
+ 
+   external Pointer<Utf8> ruleFars;
+ }
+ 
+ final class SherpaOnnxGeneratedAudio extends Struct {
+   external Pointer<Float> samples;
+ 
+   @Int32()
+   external int n;
+ 
+   @Int32()
+   external int sampleRate;
+ }
+ 
 final class SherpaOnnxFeatureConfig extends Struct {
   @Int32()
   external int sampleRate;
@@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
   external Pointer<Utf8> provider;
 }
 
+ final class SherpaOnnxOfflineTts extends Opaque {}
+ 
 final class SherpaOnnxCircularBuffer extends Opaque {}
 
 final class SherpaOnnxVoiceActivityDetector extends Opaque {}
@@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
 
 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
 
+ typedef SherpaOnnxCreateOfflineTtsNative = Pointer<SherpaOnnxOfflineTts>
+     Function(Pointer<SherpaOnnxOfflineTtsConfig>);
+ 
+ typedef SherpaOnnxCreateOfflineTts = SherpaOnnxCreateOfflineTtsNative;
+ 
+ typedef SherpaOnnxDestroyOfflineTtsNative = Void Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxDestroyOfflineTts = void Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxOfflineTtsSampleRateNative = Int32 Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxOfflineTtsSampleRate = int Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxOfflineTtsNumSpeakersNative = Int32 Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxOfflineTtsNumSpeakers = int Function(
+     Pointer<SherpaOnnxOfflineTts>);
+ 
+ typedef SherpaOnnxOfflineTtsGenerateNative = Pointer<SherpaOnnxGeneratedAudio>
+     Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, Int32, Float);
+ 
+ typedef SherpaOnnxOfflineTtsGenerate = Pointer<SherpaOnnxGeneratedAudio>
+     Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, int, double);
+ 
+ typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function(
+     Pointer<SherpaOnnxGeneratedAudio>);
+ 
+ typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
+     Pointer<SherpaOnnxGeneratedAudio>);
+ 
+ typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function(
+     Pointer<Float>, Int32);
+ 
+ typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
+     = Pointer<SherpaOnnxGeneratedAudio> Function(
+         Pointer<SherpaOnnxOfflineTts>,
+         Pointer<Utf8>,
+         Int32,
+         Float,
+         Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
+ 
+ typedef SherpaOnnxOfflineTtsGenerateWithCallback
+     = Pointer<SherpaOnnxGeneratedAudio> Function(
+         Pointer<SherpaOnnxOfflineTts>,
+         Pointer<Utf8>,
+         int,
+         double,
+         Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
+ 
 typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
     Function(Pointer<SherpaOnnxOfflineRecognizerConfig>);
 
@@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
 
 class SherpaOnnxBindings {
+   static SherpaOnnxCreateOfflineTts? createOfflineTts;
+   static SherpaOnnxDestroyOfflineTts? destroyOfflineTts;
+   static SherpaOnnxOfflineTtsSampleRate? offlineTtsSampleRate;
+   static SherpaOnnxOfflineTtsNumSpeakers? offlineTtsNumSpeakers;
+   static SherpaOnnxOfflineTtsGenerate? offlineTtsGenerate;
+   static SherpaOnnxDestroyOfflineTtsGeneratedAudio?
+       destroyOfflineTtsGeneratedAudio;
+   static SherpaOnnxOfflineTtsGenerateWithCallback?
+       offlineTtsGenerateWithCallback;
+ 
   static CreateOfflineRecognizer? createOfflineRecognizer;
   static DestroyOfflineRecognizer? destroyOfflineRecognizer;
   static CreateOfflineStream? createOfflineStream;
@@ -740,6 +855,43 @@ class SherpaOnnxBindings {
   static SherpaOnnxFreeWave? freeWave;
 
   static void init(DynamicLibrary dynamicLibrary) {
+     createOfflineTts ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxCreateOfflineTtsNative>>(
+             'SherpaOnnxCreateOfflineTts')
+         .asFunction();
+ 
+     destroyOfflineTts ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxDestroyOfflineTtsNative>>(
+             'SherpaOnnxDestroyOfflineTts')
+         .asFunction();
+ 
+     offlineTtsSampleRate ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxOfflineTtsSampleRateNative>>(
+             'SherpaOnnxOfflineTtsSampleRate')
+         .asFunction();
+ 
+     offlineTtsNumSpeakers ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxOfflineTtsNumSpeakersNative>>(
+             'SherpaOnnxOfflineTtsNumSpeakers')
+         .asFunction();
+ 
+     offlineTtsGenerate ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateNative>>(
+             'SherpaOnnxOfflineTtsGenerate')
+         .asFunction();
+ 
+     destroyOfflineTtsGeneratedAudio ??= dynamicLibrary
+         .lookup<
+                 NativeFunction<
+                     SherpaOnnxDestroyOfflineTtsGeneratedAudioNative>>(
+             'SherpaOnnxDestroyOfflineTtsGeneratedAudio')
+         .asFunction();
+ 
+     offlineTtsGenerateWithCallback ??= dynamicLibrary
+         .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithCallbackNative>>(
+             'SherpaOnnxOfflineTtsGenerateWithCallback')
+         .asFunction();
+ 
     createOfflineRecognizer ??= dynamicLibrary
         .lookup<NativeFunction<CreateOfflineRecognizerNative>>(
             'CreateOfflineRecognizer')
--- a/sherpa-onnx/flutter/lib/src/tts.dart 0 → 100644
查看文件 @e52d32b
+++ b/sherpa-onnx/flutter/lib/src/tts.dart 0 → 100644
查看文件 @e52d32b
+ // Copyright (c)  2024  Xiaomi Corporation
+ import 'dart:convert';
+ import 'dart:ffi';
+ import 'dart:typed_data';
+ 
+ import 'package:ffi/ffi.dart';
+ 
+ import './sherpa_onnx_bindings.dart';
+ 
+ class OfflineTtsVitsModelConfig {
+   const OfflineTtsVitsModelConfig({
+     required this.model,
+     this.lexicon = '',
+     required this.tokens,
+     this.dataDir = '',
+     this.noiseScale = 0.667,
+     this.noiseScaleW = 0.8,
+     this.lengthScale = 1.0,
+     this.dictDir = '',
+   });
+ 
+   @override
+   String toString() {
+     return 'OfflineTtsVitsModelConfig(model: $model, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, noiseScaleW: $noiseScaleW, lengthScale: $lengthScale, dictDir: $dictDir)';
+   }
+ 
+   final String model;
+   final String lexicon;
+   final String tokens;
+   final String dataDir;
+   final double noiseScale;
+   final double noiseScaleW;
+   final double lengthScale;
+   final String dictDir;
+ }
+ 
+ class OfflineTtsModelConfig {
+   const OfflineTtsModelConfig({
+     required this.vits,
+     this.numThreads = 1,
+     this.debug = true,
+     this.provider = 'cpu',
+   });
+ 
+   @override
+   String toString() {
+     return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
+   }
+ 
+   final OfflineTtsVitsModelConfig vits;
+   final int numThreads;
+   final bool debug;
+   final String provider;
+ }
+ 
+ class OfflineTtsConfig {
+   const OfflineTtsConfig({
+     required this.model,
+     this.ruleFsts = '',
+     this.maxNumSenetences = 1,
+     this.ruleFars = '',
+   });
+ 
+   @override
+   String toString() {
+     return 'OfflineTtsConfig(model: $model, ruleFsts: $ruleFsts, maxNumSenetences: $maxNumSenetences, ruleFars: $ruleFars)';
+   }
+ 
+   final OfflineTtsModelConfig model;
+   final String ruleFsts;
+   final int maxNumSenetences;
+   final String ruleFars;
+ }
+ 
+ class GeneratedAudio {
+   GeneratedAudio({
+     required this.samples,
+     required this.sampleRate,
+   });
+ 
+   final Float32List samples;
+   final int sampleRate;
+ }
+ 
+ class OfflineTts {
+   OfflineTts._({required this.ptr, required this.config});
+ 
+   /// The user is responsible to call the OfflineTts.free()
+   /// method of the returned instance to avoid memory leak.
+   factory OfflineTts(OfflineTtsConfig config) {
+     final c = calloc<SherpaOnnxOfflineTtsConfig>();
+     c.ref.model.vits.model = config.model.vits.model.toNativeUtf8();
+     c.ref.model.vits.lexicon = config.model.vits.lexicon.toNativeUtf8();
+     c.ref.model.vits.tokens = config.model.vits.tokens.toNativeUtf8();
+     c.ref.model.vits.dataDir = config.model.vits.dataDir.toNativeUtf8();
+     c.ref.model.vits.noiseScale = config.model.vits.noiseScale;
+     c.ref.model.vits.noiseScaleW = config.model.vits.noiseScaleW;
+     c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
+     c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
+ 
+     c.ref.model.numThreads = config.model.numThreads;
+     c.ref.model.debug = config.model.debug ? 1 : 0;
+     c.ref.model.provider = config.model.provider.toNativeUtf8();
+ 
+     c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
+     c.ref.maxNumSenetences = config.maxNumSenetences;
+     c.ref.ruleFars = config.ruleFars.toNativeUtf8();
+ 
+     final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr;
+ 
+     calloc.free(c.ref.ruleFars);
+     calloc.free(c.ref.ruleFsts);
+     calloc.free(c.ref.model.provider);
+     calloc.free(c.ref.model.vits.dictDir);
+     calloc.free(c.ref.model.vits.dataDir);
+     calloc.free(c.ref.model.vits.tokens);
+     calloc.free(c.ref.model.vits.lexicon);
+     calloc.free(c.ref.model.vits.model);
+ 
+     return OfflineTts._(ptr: ptr, config: config);
+   }
+ 
+   void free() {
+     SherpaOnnxBindings.destroyOfflineTts?.call(ptr);
+     ptr = nullptr;
+   }
+ 
+   GeneratedAudio generate(
+       {required String text, int sid = 0, double speed = 1.0}) {
+     final Pointer<Utf8> textPtr = text.toNativeUtf8();
+     final p =
+         SherpaOnnxBindings.offlineTtsGenerate?.call(ptr, textPtr, sid, speed) ??
+             nullptr;
+     calloc.free(textPtr);
+ 
+     if (p == nullptr) {
+       return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
+     }
+ 
+     final samples = p.ref.samples.asTypedList(p.ref.n);
+     final sampleRate = p.ref.sampleRate;
+     final newSamples = Float32List.fromList(samples);
+ 
+     SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
+ 
+     return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
+   }
+ 
+   GeneratedAudio generateWithCallback(
+       {required String text,
+       int sid = 0,
+       double speed = 1.0,
+       required void Function(Float32List samples) callback}) {
+     // see
+     // https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
+     // https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
+     // https://github.com/dart-lang/sdk/blob/main/tests/ffi/isolate_local_function_callbacks_test.dart#L46
+     final wrapper =
+         NativeCallable<SherpaOnnxGeneratedAudioCallbackNative>.isolateLocal(
+             (Pointer<Float> samples, int n) {
+       final s = samples.asTypedList(n);
+       final newSamples = Float32List.fromList(s);
+       callback(newSamples);
+     });
+ 
+     final Pointer<Utf8> textPtr = text.toNativeUtf8();
+     final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
+             ?.call(ptr, textPtr, sid, speed, wrapper.nativeFunction) ??
+         nullptr;
+ 
+     calloc.free(textPtr);
+     wrapper.close();
+ 
+     if (p == nullptr) {
+       return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
+     }
+ 
+     final samples = p.ref.samples.asTypedList(p.ref.n);
+     final sampleRate = p.ref.sampleRate;
+     final newSamples = Float32List.fromList(samples);
+ 
+     SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
+ 
+     return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
+   }
+ 
+   int get sampleRate =>
+       SherpaOnnxBindings.offlineTtsSampleRate?.call(this.ptr) ?? 0;
+ 
+   int get numSpeakers =>
+       SherpaOnnxBindings.offlineTtsNumSpeakers?.call(this.ptr) ?? 0;
+ 
+   Pointer<SherpaOnnxOfflineTts> ptr;
+   OfflineTtsConfig config;
+ }