Fangjun Kuang
Committed by GitHub

Add TTS API and examples for Dart (#1010)

@@ -4,6 +4,22 @@ set -ex @@ -4,6 +4,22 @@ set -ex
4 4
5 cd dart-api-examples 5 cd dart-api-examples
6 6
  7 +pushd tts
  8 +
  9 +echo '----------piper tts----------'
  10 +./run-piper.sh
  11 +rm -rf vits-piper-*
  12 +
  13 +echo '----------coqui tts----------'
  14 +./run-coqui.sh
  15 +rm -rf vits-coqui-*
  16 +
  17 +echo '----------zh tts----------'
  18 +./run-zh.sh
  19 +rm -rf sherpa-onnx-*
  20 +
  21 +popd # tts
  22 +
7 pushd streaming-asr 23 pushd streaming-asr
8 24
9 echo '----------streaming zipformer ctc HLG----------' 25 echo '----------streaming zipformer ctc HLG----------'
@@ -92,5 +92,6 @@ jobs: @@ -92,5 +92,6 @@ jobs:
92 cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml 92 cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
93 cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml 93 cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
94 cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml 94 cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
  95 + cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
95 96
96 .github/scripts/test-dart.sh 97 .github/scripts/test-dart.sh
1 # Introduction 1 # Introduction
2 2
3 This folder contains examples for streaming ASR with Dart API. 3 This folder contains examples for streaming ASR with Dart API.
  4 +
4 | File | Description| 5 | File | Description|
5 |------|------------| 6 |------|------------|
6 |[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)| 7 |[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
  1 +# https://dart.dev/guides/libraries/private-files
  2 +# Created by `dart pub`
  3 +.dart_tool/
  1 +## 1.0.0
  2 +
  3 +- Initial version.
  1 +# Introduction
  2 +
  3 +This folder contains examples for text to speech with Dart API.
  4 +
  5 +| File | Description|
  6 +|------|------------|
  7 +|[./bin/piper.dart](./bin/piper.dart)| Use a Piper tts model for text to speech. See [./run-piper.sh](./run-piper.sh)|
  8 +|[./bin/coqui.dart](./bin/coqui.dart)| Use a Coqui tts model for text to speech. See [./run-coqui.sh](./run-coqui.sh)|
  9 +|[./bin/zh.dart](./bin/zh.dart)| Use a Chinese VITS tts model for text to speech. See [./run-zh.sh](./run-zh.sh)|
  10 +
  1 +# This file configures the static analysis results for your project (errors,
  2 +# warnings, and lints).
  3 +#
  4 +# This enables the 'recommended' set of lints from `package:lints`.
  5 +# This set helps identify many issues that may lead to problems when running
  6 +# or consuming Dart code, and enforces writing Dart using a single, idiomatic
  7 +# style and format.
  8 +#
  9 +# If you want a smaller set of lints you can change this to specify
  10 +# 'package:lints/core.yaml'. These are just the most critical lints
  11 +# (the recommended set includes the core lints).
  12 +# The core lints are also what is used by pub.dev for scoring packages.
  13 +
  14 +include: package:lints/recommended.yaml
  15 +
  16 +# Uncomment the following section to specify additional rules.
  17 +
  18 +# linter:
  19 +# rules:
  20 +# - camel_case_types
  21 +
  22 +# analyzer:
  23 +# exclude:
  24 +# - path/to/excluded/files/**
  25 +
  26 +# For more information about the core and recommended set of lints, see
  27 +# https://dart.dev/go/core-lints
  28 +
  29 +# For additional information about configuring this file, see
  30 +# https://dart.dev/guides/language/analysis-options
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('model', help: 'Path to the ONNX model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('text', help: 'Text to generate TTS for')
  17 + ..addOption('output-wav', help: 'Filename to save the generated audio')
  18 + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
  19 + ..addOption(
  20 + 'sid',
  21 + help: 'Speaker ID to select. Used only for multi-speaker TTS',
  22 + defaultsTo: '0',
  23 + );
  24 + final res = parser.parse(arguments);
  25 + if (res['model'] == null ||
  26 + res['tokens'] == null ||
  27 + res['output-wav'] == null ||
  28 + res['text'] == null) {
  29 + print(parser.usage);
  30 + exit(1);
  31 + }
  32 + final model = res['model'] as String;
  33 + final tokens = res['tokens'] as String;
  34 + final text = res['text'] as String;
  35 + final outputWav = res['output-wav'] as String;
  36 + var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  37 + final sid = int.tryParse(res['sid'] as String) ?? 0;
  38 +
  39 + if (speed == 0) {
  40 + speed = 1.0;
  41 + }
  42 +
  43 + final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
  44 + model: model,
  45 + tokens: tokens,
  46 + lengthScale: 1 / speed,
  47 + );
  48 +
  49 + final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
  50 + vits: vits,
  51 + numThreads: 1,
  52 + debug: true,
  53 + );
  54 + final config = sherpa_onnx.OfflineTtsConfig(
  55 + model: modelConfig,
  56 + maxNumSenetences: 1,
  57 + );
  58 +
  59 + final tts = sherpa_onnx.OfflineTts(config);
  60 + final audio = tts.generate(text: text, sid: sid, speed: speed);
  61 + tts.free();
  62 +
  63 + sherpa_onnx.writeWave(
  64 + filename: outputWav,
  65 + samples: audio.samples,
  66 + sampleRate: audio.sampleRate,
  67 + );
  68 + print('Saved to ${outputWav}');
  69 +}
  1 +../../vad/bin/init.dart
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('model', help: 'Path to the ONNX model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('data-dir', help: 'Path to espeak-ng-data directory')
  17 + ..addOption('text', help: 'Text to generate TTS for')
  18 + ..addOption('output-wav', help: 'Filename to save the generated audio')
  19 + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
  20 + ..addOption(
  21 + 'sid',
  22 + help: 'Speaker ID to select. Used only for multi-speaker TTS',
  23 + defaultsTo: '0',
  24 + );
  25 + final res = parser.parse(arguments);
  26 + if (res['model'] == null ||
  27 + res['tokens'] == null ||
  28 + res['data-dir'] == null ||
  29 + res['output-wav'] == null ||
  30 + res['text'] == null) {
  31 + print(parser.usage);
  32 + exit(1);
  33 + }
  34 + final model = res['model'] as String;
  35 + final tokens = res['tokens'] as String;
  36 + final dataDir = res['data-dir'] as String;
  37 + final text = res['text'] as String;
  38 + final outputWav = res['output-wav'] as String;
  39 + var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  40 + final sid = int.tryParse(res['sid'] as String) ?? 0;
  41 +
  42 + if (speed == 0) {
  43 + speed = 1.0;
  44 + }
  45 +
  46 + final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
  47 + model: model,
  48 + tokens: tokens,
  49 + dataDir: dataDir,
  50 + lengthScale: 1 / speed,
  51 + );
  52 +
  53 + final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
  54 + vits: vits,
  55 + numThreads: 1,
  56 + debug: true,
  57 + );
  58 + final config = sherpa_onnx.OfflineTtsConfig(
  59 + model: modelConfig,
  60 + maxNumSenetences: 1,
  61 + );
  62 +
  63 + final tts = sherpa_onnx.OfflineTts(config);
  64 + final audio = tts.generateWithCallback(
  65 + text: text,
  66 + sid: sid,
  67 + speed: speed,
  68 + callback: (Float32List samples) {
  69 + print('${samples.length} samples received');
  70 + // You can play samples in a separate thread/isolate
  71 + });
  72 + tts.free();
  73 +
  74 + sherpa_onnx.writeWave(
  75 + filename: outputWav,
  76 + samples: audio.samples,
  77 + sampleRate: audio.sampleRate,
  78 + );
  79 + print('Saved to ${outputWav}');
  80 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('model', help: 'Path to the ONNX model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('lexicon', help: 'Path to lexicon.txt')
  17 + ..addOption(
  18 + 'dict-dir',
  19 + help: 'Path to jieba dict directory',
  20 + defaultsTo: '',
  21 + )
  22 + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '')
  23 + ..addOption('rule-fars', help: 'Path to rule fars', defaultsTo: '')
  24 + ..addOption('text', help: 'Text to generate TTS for')
  25 + ..addOption('output-wav', help: 'Filename to save the generated audio')
  26 + ..addOption('speed', help: 'Speech speed', defaultsTo: '1.0')
  27 + ..addOption(
  28 + 'sid',
  29 + help: 'Speaker ID to select. Used only for multi-speaker TTS',
  30 + defaultsTo: '0',
  31 + );
  32 + final res = parser.parse(arguments);
  33 + if (res['model'] == null ||
  34 + res['lexicon'] == null ||
  35 + res['tokens'] == null ||
  36 + res['output-wav'] == null ||
  37 + res['text'] == null) {
  38 + print(parser.usage);
  39 + exit(1);
  40 + }
  41 + final model = res['model'] as String;
  42 + final lexicon = res['lexicon'] as String;
  43 + final tokens = res['tokens'] as String;
  44 + final dictDir = res['dict-dir'] as String;
  45 + final ruleFsts = res['rule-fsts'] as String;
  46 + final ruleFars = res['rule-fars'] as String;
  47 + final text = res['text'] as String;
  48 + final outputWav = res['output-wav'] as String;
  49 + var speed = double.tryParse(res['speed'] as String) ?? 1.0;
  50 + final sid = int.tryParse(res['sid'] as String) ?? 0;
  51 +
  52 + if (speed == 0) {
  53 + speed = 1.0;
  54 + }
  55 +
  56 + final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
  57 + model: model,
  58 + lexicon: lexicon,
  59 + tokens: tokens,
  60 + dictDir: dictDir,
  61 + lengthScale: 1 / speed,
  62 + );
  63 +
  64 + final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
  65 + vits: vits,
  66 + numThreads: 1,
  67 + debug: true,
  68 + );
  69 + final config = sherpa_onnx.OfflineTtsConfig(
  70 + model: modelConfig,
  71 + maxNumSenetences: 1,
  72 + ruleFsts: ruleFsts,
  73 + ruleFars: ruleFars,
  74 + );
  75 +
  76 + final tts = sherpa_onnx.OfflineTts(config);
  77 + final audio = tts.generate(text: text, sid: sid, speed: speed);
  78 + tts.free();
  79 +
  80 + sherpa_onnx.writeWave(
  81 + filename: outputWav,
  82 + samples: audio.samples,
  83 + sampleRate: audio.sampleRate,
  84 + );
  85 + print('Saved to ${outputWav}');
  86 +}
  1 +# Generated by pub
  2 +# See https://dart.dev/tools/pub/glossary#lockfile
  3 +packages:
  4 + args:
  5 + dependency: "direct main"
  6 + description:
  7 + name: args
  8 + sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a"
  9 + url: "https://pub.dev"
  10 + source: hosted
  11 + version: "2.5.0"
  12 + characters:
  13 + dependency: transitive
  14 + description:
  15 + name: characters
  16 + sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
  17 + url: "https://pub.dev"
  18 + source: hosted
  19 + version: "1.3.0"
  20 + collection:
  21 + dependency: transitive
  22 + description:
  23 + name: collection
  24 + sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a
  25 + url: "https://pub.dev"
  26 + source: hosted
  27 + version: "1.18.0"
  28 + ffi:
  29 + dependency: transitive
  30 + description:
  31 + name: ffi
  32 + sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21"
  33 + url: "https://pub.dev"
  34 + source: hosted
  35 + version: "2.1.2"
  36 + flutter:
  37 + dependency: transitive
  38 + description: flutter
  39 + source: sdk
  40 + version: "0.0.0"
  41 + lints:
  42 + dependency: "direct dev"
  43 + description:
  44 + name: lints
  45 + sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290
  46 + url: "https://pub.dev"
  47 + source: hosted
  48 + version: "3.0.0"
  49 + material_color_utilities:
  50 + dependency: transitive
  51 + description:
  52 + name: material_color_utilities
  53 + sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a"
  54 + url: "https://pub.dev"
  55 + source: hosted
  56 + version: "0.8.0"
  57 + meta:
  58 + dependency: transitive
  59 + description:
  60 + name: meta
  61 + sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136"
  62 + url: "https://pub.dev"
  63 + source: hosted
  64 + version: "1.12.0"
  65 + path:
  66 + dependency: "direct main"
  67 + description:
  68 + name: path
  69 + sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
  70 + url: "https://pub.dev"
  71 + source: hosted
  72 + version: "1.9.0"
  73 + sherpa_onnx:
  74 + dependency: "direct main"
  75 + description:
  76 + name: sherpa_onnx
  77 + sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8
  78 + url: "https://pub.dev"
  79 + source: hosted
  80 + version: "1.9.29"
  81 + sky_engine:
  82 + dependency: transitive
  83 + description: flutter
  84 + source: sdk
  85 + version: "0.0.99"
  86 + vector_math:
  87 + dependency: transitive
  88 + description:
  89 + name: vector_math
  90 + sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803"
  91 + url: "https://pub.dev"
  92 + source: hosted
  93 + version: "2.1.4"
  94 +sdks:
  95 + dart: ">=3.4.0 <4.0.0"
  96 + flutter: ">=3.3.0"
  1 +name: tts
  2 +description: A sample command-line application.
  3 +version: 1.0.0
  4 +# repository: https://github.com/my_org/my_repo
  5 +
  6 +environment:
  7 + sdk: ^3.4.0
  8 +
  9 +# Add regular dependencies here.
  10 +dependencies:
  11 + sherpa_onnx: ^1.9.29
  12 + path: ^1.9.0
  13 + args: ^2.5.0
  14 +
  15 +dev_dependencies:
  16 + lints: ^3.0.0
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +
  8 +# Please visit
  9 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  10 +# to download more models
  11 +
  12 +if [[ ! -f ./vits-coqui-de-css10/tokens.txt ]]; then
  13 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
  14 + tar xvf vits-coqui-de-css10.tar.bz2
  15 + rm vits-coqui-de-css10.tar.bz2
  16 +fi
  17 +
  18 +# It is a character-based TTS model, so there is no need to use a lexicon
  19 +dart run \
  20 + ./bin/coqui.dart \
  21 + --model ./vits-coqui-de-css10/model.onnx \
  22 + --tokens ./vits-coqui-de-css10/tokens.txt \
  23 + --sid 0 \
  24 + --speed 0.7 \
  25 + --text 'Alles hat ein Ende, nur die Wurst hat zwei.' \
  26 + --output-wav coqui-0.wav
  27 +
  28 +ls -lh *.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +
  8 +# Please visit
  9 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  10 +# to download more models
  11 +
  12 +if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then
  13 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  14 + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  15 + rm vits-piper-en_US-libritts_r-medium.tar.bz2
  16 +fi
  17 +
  18 +dart run \
  19 + ./bin/piper.dart \
  20 + --model ./vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx \
  21 + --tokens ./vits-piper-en_US-libritts_r-medium/tokens.txt \
  22 + --data-dir ./vits-piper-en_US-libritts_r-medium/espeak-ng-data \
  23 + --sid 351 \
  24 + --speed 1.0 \
  25 + --text 'How are you doing? This is a speech to text example, using next generation kaldi with piper.' \
  26 + --output-wav piper-351.wav
  27 +
  28 +ls -lh *.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +
  8 +# Please visit
  9 +# https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  10 +# to download more models
  11 +
  12 +if [[ ! -f ./sherpa-onnx-vits-zh-ll/tokens.txt ]]; then
  13 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
  14 + tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
  15 + rm sherpa-onnx-vits-zh-ll.tar.bz2
  16 +fi
  17 +
  18 +dart run \
  19 + ./bin/zh.dart \
  20 + --model ./sherpa-onnx-vits-zh-ll/model.onnx \
  21 + --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
  22 + --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
  23 + --dict-dir ./sherpa-onnx-vits-zh-ll/dict \
  24 + --sid 2 \
  25 + --speed 1.0 \
  26 + --text '当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。' \
  27 + --output-wav zh-jieba-2.wav
  28 +
  29 +dart run \
  30 + ./bin/zh.dart \
  31 + --model ./sherpa-onnx-vits-zh-ll/model.onnx \
  32 + --lexicon ./sherpa-onnx-vits-zh-ll/lexicon.txt \
  33 + --tokens ./sherpa-onnx-vits-zh-ll/tokens.txt \
  34 + --dict-dir ./sherpa-onnx-vits-zh-ll/dict \
  35 + --rule-fsts "./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst" \
  36 + --sid 3 \
  37 + --speed 1.0 \
  38 + --text '今天是2024年6月15号,13点23分。如果有困难,请拨打110或者18920240511。123456块钱。' \
  39 + --output-wav zh-jieba-3.wav
  40 +
  41 +ls -lh *.wav
  1 +name: tts
  2 +description: A sample command-line application.
  3 +version: 1.0.0
  4 +# repository: https://github.com/my_org/my_repo
  5 +
  6 +environment:
  7 + sdk: ^3.4.0
  8 +
  9 +# Add regular dependencies here.
  10 +dependencies:
  11 + sherpa_onnx:
  12 + path: ../../sherpa-onnx/flutter
  13 + path: ^1.9.0
  14 + args: ^2.5.0
  15 +
  16 +dev_dependencies:
  17 + lints: ^3.0.0
@@ -8,6 +8,7 @@ export 'src/offline_stream.dart'; @@ -8,6 +8,7 @@ export 'src/offline_stream.dart';
8 export 'src/online_recognizer.dart'; 8 export 'src/online_recognizer.dart';
9 export 'src/online_stream.dart'; 9 export 'src/online_stream.dart';
10 export 'src/speaker_identification.dart'; 10 export 'src/speaker_identification.dart';
  11 +export 'src/tts.dart';
11 export 'src/vad.dart'; 12 export 'src/vad.dart';
12 export 'src/wave_reader.dart'; 13 export 'src/wave_reader.dart';
13 export 'src/wave_writer.dart'; 14 export 'src/wave_writer.dart';
@@ -2,6 +2,55 @@ @@ -2,6 +2,55 @@
2 import 'dart:ffi'; 2 import 'dart:ffi';
3 import 'package:ffi/ffi.dart'; 3 import 'package:ffi/ffi.dart';
4 4
  5 +final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct {
  6 + external Pointer<Utf8> model;
  7 + external Pointer<Utf8> lexicon;
  8 + external Pointer<Utf8> tokens;
  9 + external Pointer<Utf8> dataDir;
  10 +
  11 + @Float()
  12 + external double noiseScale;
  13 +
  14 + @Float()
  15 + external double noiseScaleW;
  16 +
  17 + @Float()
  18 + external double lengthScale;
  19 +
  20 + external Pointer<Utf8> dictDir;
  21 +}
  22 +
  23 +final class SherpaOnnxOfflineTtsModelConfig extends Struct {
  24 + external SherpaOnnxOfflineTtsVitsModelConfig vits;
  25 + @Int32()
  26 + external int numThreads;
  27 +
  28 + @Int32()
  29 + external int debug;
  30 +
  31 + external Pointer<Utf8> provider;
  32 +}
  33 +
  34 +final class SherpaOnnxOfflineTtsConfig extends Struct {
  35 + external SherpaOnnxOfflineTtsModelConfig model;
  36 + external Pointer<Utf8> ruleFsts;
  37 +
  38 + @Int32()
  39 + external int maxNumSenetences;
  40 +
  41 + external Pointer<Utf8> ruleFars;
  42 +}
  43 +
  44 +final class SherpaOnnxGeneratedAudio extends Struct {
  45 + external Pointer<Float> samples;
  46 +
  47 + @Int32()
  48 + external int n;
  49 +
  50 + @Int32()
  51 + external int sampleRate;
  52 +}
  53 +
5 final class SherpaOnnxFeatureConfig extends Struct { 54 final class SherpaOnnxFeatureConfig extends Struct {
6 @Int32() 55 @Int32()
7 external int sampleRate; 56 external int sampleRate;
@@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { @@ -218,6 +267,8 @@ final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
218 external Pointer<Utf8> provider; 267 external Pointer<Utf8> provider;
219 } 268 }
220 269
  270 +final class SherpaOnnxOfflineTts extends Opaque {}
  271 +
221 final class SherpaOnnxCircularBuffer extends Opaque {} 272 final class SherpaOnnxCircularBuffer extends Opaque {}
222 273
223 final class SherpaOnnxVoiceActivityDetector extends Opaque {} 274 final class SherpaOnnxVoiceActivityDetector extends Opaque {}
@@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} @@ -234,6 +285,60 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
234 285
235 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} 286 final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
236 287
  288 +typedef SherpaOnnxCreateOfflineTtsNative = Pointer<SherpaOnnxOfflineTts>
  289 + Function(Pointer<SherpaOnnxOfflineTtsConfig>);
  290 +
  291 +typedef SherpaOnnxCreateOfflineTts = SherpaOnnxCreateOfflineTtsNative;
  292 +
  293 +typedef SherpaOnnxDestroyOfflineTtsNative = Void Function(
  294 + Pointer<SherpaOnnxOfflineTts>);
  295 +
  296 +typedef SherpaOnnxDestroyOfflineTts = void Function(
  297 + Pointer<SherpaOnnxOfflineTts>);
  298 +
  299 +typedef SherpaOnnxOfflineTtsSampleRateNative = Int32 Function(
  300 + Pointer<SherpaOnnxOfflineTts>);
  301 +
  302 +typedef SherpaOnnxOfflineTtsSampleRate = int Function(
  303 + Pointer<SherpaOnnxOfflineTts>);
  304 +
  305 +typedef SherpaOnnxOfflineTtsNumSpeakersNative = Int32 Function(
  306 + Pointer<SherpaOnnxOfflineTts>);
  307 +
  308 +typedef SherpaOnnxOfflineTtsNumSpeakers = int Function(
  309 + Pointer<SherpaOnnxOfflineTts>);
  310 +
  311 +typedef SherpaOnnxOfflineTtsGenerateNative = Pointer<SherpaOnnxGeneratedAudio>
  312 + Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, Int32, Float);
  313 +
  314 +typedef SherpaOnnxOfflineTtsGenerate = Pointer<SherpaOnnxGeneratedAudio>
  315 + Function(Pointer<SherpaOnnxOfflineTts>, Pointer<Utf8>, int, double);
  316 +
  317 +typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function(
  318 + Pointer<SherpaOnnxGeneratedAudio>);
  319 +
  320 +typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
  321 + Pointer<SherpaOnnxGeneratedAudio>);
  322 +
  323 +typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function(
  324 + Pointer<Float>, Int32);
  325 +
  326 +typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
  327 + = Pointer<SherpaOnnxGeneratedAudio> Function(
  328 + Pointer<SherpaOnnxOfflineTts>,
  329 + Pointer<Utf8>,
  330 + Int32,
  331 + Float,
  332 + Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
  333 +
  334 +typedef SherpaOnnxOfflineTtsGenerateWithCallback
  335 + = Pointer<SherpaOnnxGeneratedAudio> Function(
  336 + Pointer<SherpaOnnxOfflineTts>,
  337 + Pointer<Utf8>,
  338 + int,
  339 + double,
  340 + Pointer<NativeFunction<SherpaOnnxGeneratedAudioCallbackNative>>);
  341 +
237 typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> 342 typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
238 Function(Pointer<SherpaOnnxOfflineRecognizerConfig>); 343 Function(Pointer<SherpaOnnxOfflineRecognizerConfig>);
239 344
@@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); @@ -608,6 +713,16 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
608 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); 713 typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
609 714
610 class SherpaOnnxBindings { 715 class SherpaOnnxBindings {
  716 + static SherpaOnnxCreateOfflineTts? createOfflineTts;
  717 + static SherpaOnnxDestroyOfflineTts? destroyOfflineTts;
  718 + static SherpaOnnxOfflineTtsSampleRate? offlineTtsSampleRate;
  719 + static SherpaOnnxOfflineTtsNumSpeakers? offlineTtsNumSpeakers;
  720 + static SherpaOnnxOfflineTtsGenerate? offlineTtsGenerate;
  721 + static SherpaOnnxDestroyOfflineTtsGeneratedAudio?
  722 + destroyOfflineTtsGeneratedAudio;
  723 + static SherpaOnnxOfflineTtsGenerateWithCallback?
  724 + offlineTtsGenerateWithCallback;
  725 +
611 static CreateOfflineRecognizer? createOfflineRecognizer; 726 static CreateOfflineRecognizer? createOfflineRecognizer;
612 static DestroyOfflineRecognizer? destroyOfflineRecognizer; 727 static DestroyOfflineRecognizer? destroyOfflineRecognizer;
613 static CreateOfflineStream? createOfflineStream; 728 static CreateOfflineStream? createOfflineStream;
@@ -740,6 +855,43 @@ class SherpaOnnxBindings { @@ -740,6 +855,43 @@ class SherpaOnnxBindings {
740 static SherpaOnnxFreeWave? freeWave; 855 static SherpaOnnxFreeWave? freeWave;
741 856
742 static void init(DynamicLibrary dynamicLibrary) { 857 static void init(DynamicLibrary dynamicLibrary) {
  858 + createOfflineTts ??= dynamicLibrary
  859 + .lookup<NativeFunction<SherpaOnnxCreateOfflineTtsNative>>(
  860 + 'SherpaOnnxCreateOfflineTts')
  861 + .asFunction();
  862 +
  863 + destroyOfflineTts ??= dynamicLibrary
  864 + .lookup<NativeFunction<SherpaOnnxDestroyOfflineTtsNative>>(
  865 + 'SherpaOnnxDestroyOfflineTts')
  866 + .asFunction();
  867 +
  868 + offlineTtsSampleRate ??= dynamicLibrary
  869 + .lookup<NativeFunction<SherpaOnnxOfflineTtsSampleRateNative>>(
  870 + 'SherpaOnnxOfflineTtsSampleRate')
  871 + .asFunction();
  872 +
  873 + offlineTtsNumSpeakers ??= dynamicLibrary
  874 + .lookup<NativeFunction<SherpaOnnxOfflineTtsNumSpeakersNative>>(
  875 + 'SherpaOnnxOfflineTtsNumSpeakers')
  876 + .asFunction();
  877 +
  878 + offlineTtsGenerate ??= dynamicLibrary
  879 + .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateNative>>(
  880 + 'SherpaOnnxOfflineTtsGenerate')
  881 + .asFunction();
  882 +
  883 + destroyOfflineTtsGeneratedAudio ??= dynamicLibrary
  884 + .lookup<
  885 + NativeFunction<
  886 + SherpaOnnxDestroyOfflineTtsGeneratedAudioNative>>(
  887 + 'SherpaOnnxDestroyOfflineTtsGeneratedAudio')
  888 + .asFunction();
  889 +
  890 + offlineTtsGenerateWithCallback ??= dynamicLibrary
  891 + .lookup<NativeFunction<SherpaOnnxOfflineTtsGenerateWithCallbackNative>>(
  892 + 'SherpaOnnxOfflineTtsGenerateWithCallback')
  893 + .asFunction();
  894 +
743 createOfflineRecognizer ??= dynamicLibrary 895 createOfflineRecognizer ??= dynamicLibrary
744 .lookup<NativeFunction<CreateOfflineRecognizerNative>>( 896 .lookup<NativeFunction<CreateOfflineRecognizerNative>>(
745 'CreateOfflineRecognizer') 897 'CreateOfflineRecognizer')
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:convert';
  3 +import 'dart:ffi';
  4 +import 'dart:typed_data';
  5 +
  6 +import 'package:ffi/ffi.dart';
  7 +
  8 +import './sherpa_onnx_bindings.dart';
  9 +
  10 +class OfflineTtsVitsModelConfig {
  11 + const OfflineTtsVitsModelConfig({
  12 + required this.model,
  13 + this.lexicon = '',
  14 + required this.tokens,
  15 + this.dataDir = '',
  16 + this.noiseScale = 0.667,
  17 + this.noiseScaleW = 0.8,
  18 + this.lengthScale = 1.0,
  19 + this.dictDir = '',
  20 + });
  21 +
  22 + @override
  23 + String toString() {
  24 + return 'OfflineTtsVitsModelConfig(model: $model, lexicon: $lexicon, tokens: $tokens, dataDir: $dataDir, noiseScale: $noiseScale, noiseScaleW: $noiseScaleW, lengthScale: $lengthScale, dictDir: $dictDir)';
  25 + }
  26 +
  27 + final String model;
  28 + final String lexicon;
  29 + final String tokens;
  30 + final String dataDir;
  31 + final double noiseScale;
  32 + final double noiseScaleW;
  33 + final double lengthScale;
  34 + final String dictDir;
  35 +}
  36 +
  37 +class OfflineTtsModelConfig {
  38 + const OfflineTtsModelConfig({
  39 + required this.vits,
  40 + this.numThreads = 1,
  41 + this.debug = true,
  42 + this.provider = 'cpu',
  43 + });
  44 +
  45 + @override
  46 + String toString() {
  47 + return 'OfflineTtsModelConfig(vits: $vits, numThreads: $numThreads, debug: $debug, provider: $provider)';
  48 + }
  49 +
  50 + final OfflineTtsVitsModelConfig vits;
  51 + final int numThreads;
  52 + final bool debug;
  53 + final String provider;
  54 +}
  55 +
  56 +class OfflineTtsConfig {
  57 + const OfflineTtsConfig({
  58 + required this.model,
  59 + this.ruleFsts = '',
  60 + this.maxNumSenetences = 1,
  61 + this.ruleFars = '',
  62 + });
  63 +
  64 + @override
  65 + String toString() {
  66 + return 'OfflineTtsConfig(model: $model, ruleFsts: $ruleFsts, maxNumSenetences: $maxNumSenetences, ruleFars: $ruleFars)';
  67 + }
  68 +
  69 + final OfflineTtsModelConfig model;
  70 + final String ruleFsts;
  71 + final int maxNumSenetences;
  72 + final String ruleFars;
  73 +}
  74 +
  75 +class GeneratedAudio {
  76 + GeneratedAudio({
  77 + required this.samples,
  78 + required this.sampleRate,
  79 + });
  80 +
  81 + final Float32List samples;
  82 + final int sampleRate;
  83 +}
  84 +
  85 +class OfflineTts {
  86 + OfflineTts._({required this.ptr, required this.config});
  87 +
  88 + /// The user is responsible to call the OfflineTts.free()
  89 + /// method of the returned instance to avoid memory leak.
  90 + factory OfflineTts(OfflineTtsConfig config) {
  91 + final c = calloc<SherpaOnnxOfflineTtsConfig>();
  92 + c.ref.model.vits.model = config.model.vits.model.toNativeUtf8();
  93 + c.ref.model.vits.lexicon = config.model.vits.lexicon.toNativeUtf8();
  94 + c.ref.model.vits.tokens = config.model.vits.tokens.toNativeUtf8();
  95 + c.ref.model.vits.dataDir = config.model.vits.dataDir.toNativeUtf8();
  96 + c.ref.model.vits.noiseScale = config.model.vits.noiseScale;
  97 + c.ref.model.vits.noiseScaleW = config.model.vits.noiseScaleW;
  98 + c.ref.model.vits.lengthScale = config.model.vits.lengthScale;
  99 + c.ref.model.vits.dictDir = config.model.vits.dictDir.toNativeUtf8();
  100 +
  101 + c.ref.model.numThreads = config.model.numThreads;
  102 + c.ref.model.debug = config.model.debug ? 1 : 0;
  103 + c.ref.model.provider = config.model.provider.toNativeUtf8();
  104 +
  105 + c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
  106 + c.ref.maxNumSenetences = config.maxNumSenetences;
  107 + c.ref.ruleFars = config.ruleFars.toNativeUtf8();
  108 +
  109 + final ptr = SherpaOnnxBindings.createOfflineTts?.call(c) ?? nullptr;
  110 +
  111 + calloc.free(c.ref.ruleFars);
  112 + calloc.free(c.ref.ruleFsts);
  113 + calloc.free(c.ref.model.provider);
  114 + calloc.free(c.ref.model.vits.dictDir);
  115 + calloc.free(c.ref.model.vits.dataDir);
  116 + calloc.free(c.ref.model.vits.tokens);
  117 + calloc.free(c.ref.model.vits.lexicon);
  118 + calloc.free(c.ref.model.vits.model);
  119 +
  120 + return OfflineTts._(ptr: ptr, config: config);
  121 + }
  122 +
  123 + void free() {
  124 + SherpaOnnxBindings.destroyOfflineTts?.call(ptr);
  125 + ptr = nullptr;
  126 + }
  127 +
  128 + GeneratedAudio generate(
  129 + {required String text, int sid = 0, double speed = 1.0}) {
  130 + final Pointer<Utf8> textPtr = text.toNativeUtf8();
  131 + final p =
  132 + SherpaOnnxBindings.offlineTtsGenerate?.call(ptr, textPtr, sid, speed) ??
  133 + nullptr;
  134 + calloc.free(textPtr);
  135 +
  136 + if (p == nullptr) {
  137 + return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
  138 + }
  139 +
  140 + final samples = p.ref.samples.asTypedList(p.ref.n);
  141 + final sampleRate = p.ref.sampleRate;
  142 + final newSamples = Float32List.fromList(samples);
  143 +
  144 + SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
  145 +
  146 + return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
  147 + }
  148 +
  149 + GeneratedAudio generateWithCallback(
  150 + {required String text,
  151 + int sid = 0,
  152 + double speed = 1.0,
  153 + required void Function(Float32List samples) callback}) {
  154 + // see
  155 + // https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
  156 + // https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
  157 + // https://github.com/dart-lang/sdk/blob/main/tests/ffi/isolate_local_function_callbacks_test.dart#L46
  158 + final wrapper =
  159 + NativeCallable<SherpaOnnxGeneratedAudioCallbackNative>.isolateLocal(
  160 + (Pointer<Float> samples, int n) {
  161 + final s = samples.asTypedList(n);
  162 + final newSamples = Float32List.fromList(s);
  163 + callback(newSamples);
  164 + });
  165 +
  166 + final Pointer<Utf8> textPtr = text.toNativeUtf8();
  167 + final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
  168 + ?.call(ptr, textPtr, sid, speed, wrapper.nativeFunction) ??
  169 + nullptr;
  170 +
  171 + calloc.free(textPtr);
  172 + wrapper.close();
  173 +
  174 + if (p == nullptr) {
  175 + return GeneratedAudio(samples: Float32List(0), sampleRate: 0);
  176 + }
  177 +
  178 + final samples = p.ref.samples.asTypedList(p.ref.n);
  179 + final sampleRate = p.ref.sampleRate;
  180 + final newSamples = Float32List.fromList(samples);
  181 +
  182 + SherpaOnnxBindings.destroyOfflineTtsGeneratedAudio?.call(p);
  183 +
  184 + return GeneratedAudio(samples: newSamples, sampleRate: sampleRate);
  185 + }
  186 +
  187 + int get sampleRate =>
  188 + SherpaOnnxBindings.offlineTtsSampleRate?.call(this.ptr) ?? 0;
  189 +
  190 + int get numSpeakers =>
  191 + SherpaOnnxBindings.offlineTtsNumSpeakers?.call(this.ptr) ?? 0;
  192 +
  193 + Pointer<SherpaOnnxOfflineTts> ptr;
  194 + OfflineTtsConfig config;
  195 +}