Aero
Committed by GitHub

Add isolate_tts demo (#1529)

  1 +import 'dart:io';
  2 +import 'dart:isolate';
  3 +
  4 +import 'package:flutter/material.dart';
  5 +import 'package:flutter/services.dart';
  6 +import 'package:media_kit/media_kit.dart';
  7 +import 'package:path/path.dart' as p;
  8 +import 'package:path_provider/path_provider.dart';
  9 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  10 +
  11 +import 'utils.dart';
  12 +
  13 +class _IsolateTask<T> {
  14 + final SendPort sendPort;
  15 +
  16 + RootIsolateToken? rootIsolateToken;
  17 +
  18 + _IsolateTask(this.sendPort, this.rootIsolateToken);
  19 +}
  20 +
  21 +class _PortModel {
  22 + final String method;
  23 +
  24 + final SendPort? sendPort;
  25 + dynamic data;
  26 +
  27 + _PortModel({
  28 + required this.method,
  29 + this.sendPort,
  30 + this.data,
  31 + });
  32 +}
  33 +
  34 +class _TtsManager {
  35 + /// 主进程通信端口
  36 + final ReceivePort receivePort;
  37 +
  38 + final Isolate isolate;
  39 +
  40 + final SendPort isolatePort;
  41 +
  42 + _TtsManager({
  43 + required this.receivePort,
  44 + required this.isolate,
  45 + required this.isolatePort,
  46 + });
  47 +}
  48 +
  49 +class IsolateTts {
  50 + static late final _TtsManager _ttsManager;
  51 +
  52 + /// 获取线程里的通信端口
  53 + static SendPort get _sendPort => _ttsManager.isolatePort;
  54 +
  55 + static late sherpa_onnx.OfflineTts _tts;
  56 +
  57 + static late Player _player;
  58 +
  59 + static Future<void> init() async {
  60 + ReceivePort port = ReceivePort();
  61 + RootIsolateToken? rootIsolateToken = RootIsolateToken.instance;
  62 +
  63 + Isolate isolate = await Isolate.spawn(
  64 + _isolateEntry,
  65 + _IsolateTask(port.sendPort, rootIsolateToken),
  66 + errorsAreFatal: false,
  67 + );
  68 + port.listen((msg) async {
  69 + if (msg is SendPort) {
  70 + print(11);
  71 + _ttsManager = _TtsManager(receivePort: port, isolate: isolate, isolatePort: msg);
  72 + return;
  73 + }
  74 + });
  75 + }
  76 +
  77 + static Future<void> _isolateEntry(_IsolateTask task) async {
  78 + if (task.rootIsolateToken != null) {
  79 + BackgroundIsolateBinaryMessenger.ensureInitialized(task.rootIsolateToken!);
  80 + }
  81 + MediaKit.ensureInitialized();
  82 + _player = Player();
  83 + sherpa_onnx.initBindings();
  84 + final receivePort = ReceivePort();
  85 + task.sendPort.send(receivePort.sendPort);
  86 +
  87 + String modelDir = '';
  88 + String modelName = '';
  89 + String ruleFsts = '';
  90 + String ruleFars = '';
  91 + String lexicon = '';
  92 + String dataDir = '';
  93 + String dictDir = '';
  94 +
  95 + // Example 7
  96 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  97 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
  98 + modelDir = 'vits-melo-tts-zh_en';
  99 + modelName = 'model.onnx';
  100 + lexicon = 'lexicon.txt';
  101 + dictDir = 'vits-melo-tts-zh_en/dict';
  102 +
  103 + if (modelName == '') {
  104 + throw Exception('You are supposed to select a model by changing the code before you run the app');
  105 + }
  106 +
  107 + final Directory directory = await getApplicationDocumentsDirectory();
  108 + modelName = p.join(directory.path, modelDir, modelName);
  109 +
  110 + if (ruleFsts != '') {
  111 + final all = ruleFsts.split(',');
  112 + var tmp = <String>[];
  113 + for (final f in all) {
  114 + tmp.add(p.join(directory.path, f));
  115 + }
  116 + ruleFsts = tmp.join(',');
  117 + }
  118 +
  119 + if (ruleFars != '') {
  120 + final all = ruleFars.split(',');
  121 + var tmp = <String>[];
  122 + for (final f in all) {
  123 + tmp.add(p.join(directory.path, f));
  124 + }
  125 + ruleFars = tmp.join(',');
  126 + }
  127 +
  128 + if (lexicon != '') {
  129 + lexicon = p.join(directory.path, modelDir, lexicon);
  130 + }
  131 +
  132 + if (dataDir != '') {
  133 + dataDir = p.join(directory.path, dataDir);
  134 + }
  135 +
  136 + if (dictDir != '') {
  137 + dictDir = p.join(directory.path, dictDir);
  138 + }
  139 +
  140 + final tokens = p.join(directory.path, modelDir, 'tokens.txt');
  141 +
  142 + final vits = sherpa_onnx.OfflineTtsVitsModelConfig(
  143 + model: modelName,
  144 + lexicon: lexicon,
  145 + tokens: tokens,
  146 + dataDir: dataDir,
  147 + dictDir: dictDir,
  148 + );
  149 +
  150 + final modelConfig = sherpa_onnx.OfflineTtsModelConfig(
  151 + vits: vits,
  152 + numThreads: 2,
  153 + debug: true,
  154 + provider: 'cpu',
  155 + );
  156 +
  157 + final config = sherpa_onnx.OfflineTtsConfig(
  158 + model: modelConfig,
  159 + ruleFsts: ruleFsts,
  160 + ruleFars: ruleFars,
  161 + maxNumSenetences: 1,
  162 + );
  163 + // print(config);
  164 + receivePort.listen((msg) async {
  165 + print(msg);
  166 + if (msg is _PortModel) {
  167 + switch (msg.method) {
  168 + case 'generate':
  169 + {
  170 + _PortModel _v = msg;
  171 + final stopwatch = Stopwatch();
  172 + stopwatch.start();
  173 + final audio = _tts.generate(text: _v.data['text'], sid: _v.data['sid'], speed: _v.data['speed']);
  174 + final suffix = '-sid-${_v.data['sid']}-speed-${_v.data['sid'].toStringAsPrecision(2)}';
  175 + final filename = await generateWaveFilename(suffix);
  176 +
  177 + final ok = sherpa_onnx.writeWave(
  178 + filename: filename,
  179 + samples: audio.samples,
  180 + sampleRate: audio.sampleRate,
  181 + );
  182 +
  183 + if (ok) {
  184 + stopwatch.stop();
  185 + double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
  186 +
  187 + double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
  188 +
  189 + print('Saved to\n$filename\n'
  190 + 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
  191 + 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
  192 + 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
  193 + '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ');
  194 +
  195 + await _player.open(Media('file:///$filename'));
  196 + await _player.play();
  197 + }
  198 + }
  199 + break;
  200 + }
  201 + }
  202 + });
  203 + _tts = sherpa_onnx.OfflineTts(config);
  204 + }
  205 +
  206 + static Future<void> generate({required String text, int sid = 0, double speed = 1.0}) async {
  207 + ReceivePort receivePort = ReceivePort();
  208 + _sendPort.send(_PortModel(
  209 + method: 'generate',
  210 + data: {'text': text, 'sid': sid, 'speed': speed},
  211 + sendPort: receivePort.sendPort,
  212 + ));
  213 + await receivePort.first;
  214 + receivePort.close();
  215 + }
  216 +}
  217 +
  218 +/// 这里是页面
  219 +class IsolateTtsView extends StatefulWidget {
  220 + const IsolateTtsView({super.key});
  221 +
  222 + @override
  223 + State<IsolateTtsView> createState() => _IsolateTtsViewState();
  224 +}
  225 +
  226 +class _IsolateTtsViewState extends State<IsolateTtsView> {
  227 + @override
  228 + void initState() {
  229 + super.initState();
  230 + IsolateTts.init();
  231 + }
  232 +
  233 + @override
  234 + Widget build(BuildContext context) {
  235 + return Scaffold(
  236 + body: Center(
  237 + child: ElevatedButton(
  238 + onPressed: () {
  239 + IsolateTts.generate(text: '这是已退出的 isolate TTS');
  240 + },
  241 + child: Text('Isolate TTS'),
  242 + ),
  243 + ),
  244 + );
  245 + }
  246 +}
1 // Copyright (c) 2024 Xiaomi Corporation 1 // Copyright (c) 2024 Xiaomi Corporation
2 import 'package:flutter/material.dart'; 2 import 'package:flutter/material.dart';
3 3
4 -import './tts.dart';  
5 import './info.dart'; 4 import './info.dart';
  5 +import './tts.dart';
  6 +import 'isolate_tts.dart';
6 7
7 void main() { 8 void main() {
8 runApp(const MyApp()); 9 runApp(const MyApp());
@@ -38,6 +39,7 @@ class _MyHomePageState extends State<MyHomePage> { @@ -38,6 +39,7 @@ class _MyHomePageState extends State<MyHomePage> {
38 final List<Widget> _tabs = [ 39 final List<Widget> _tabs = [
39 TtsScreen(), 40 TtsScreen(),
40 InfoScreen(), 41 InfoScreen(),
  42 + IsolateTtsView(),
41 ]; 43 ];
42 @override 44 @override
43 Widget build(BuildContext context) { 45 Widget build(BuildContext context) {
@@ -62,6 +64,10 @@ class _MyHomePageState extends State<MyHomePage> { @@ -62,6 +64,10 @@ class _MyHomePageState extends State<MyHomePage> {
62 icon: Icon(Icons.info), 64 icon: Icon(Icons.info),
63 label: 'Info', 65 label: 'Info',
64 ), 66 ),
  67 + BottomNavigationBarItem(
  68 + icon: Icon(Icons.multiline_chart),
  69 + label: 'isolate',
  70 + ),
65 ], 71 ],
66 ), 72 ),
67 ); 73 );
@@ -79,17 +79,16 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async { @@ -79,17 +79,16 @@ Future<sherpa_onnx.OfflineTts> createOfflineTts() async {
79 // Example 7 79 // Example 7
80 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models 80 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
81 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2 81 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
82 - // modelDir = 'vits-melo-tts-zh_en';  
83 - // modelName = 'model.onnx';  
84 - // lexicon = 'lexicon.txt';  
85 - // dictDir = 'vits-melo-tts-zh_en/dict'; 82 + modelDir = 'vits-melo-tts-zh_en';
  83 + modelName = 'model.onnx';
  84 + lexicon = 'lexicon.txt';
  85 + dictDir = 'vits-melo-tts-zh_en/dict';
86 86
87 // ============================================================ 87 // ============================================================
88 // Please don't change the remaining part of this function 88 // Please don't change the remaining part of this function
89 // ============================================================ 89 // ============================================================
90 if (modelName == '') { 90 if (modelName == '') {
91 - throw Exception(  
92 - 'You are supposed to select a model by changing the code before you run the app'); 91 + throw Exception('You are supposed to select a model by changing the code before you run the app');
93 } 92 }
94 93
95 final Directory directory = await getApplicationDocumentsDirectory(); 94 final Directory directory = await getApplicationDocumentsDirectory();
@@ -77,9 +77,7 @@ class _TtsScreenState extends State<TtsScreen> { @@ -77,9 +77,7 @@ class _TtsScreenState extends State<TtsScreen> {
77 onTapOutside: (PointerDownEvent event) { 77 onTapOutside: (PointerDownEvent event) {
78 FocusManager.instance.primaryFocus?.unfocus(); 78 FocusManager.instance.primaryFocus?.unfocus();
79 }, 79 },
80 - inputFormatters: <TextInputFormatter>[  
81 - FilteringTextInputFormatter.digitsOnly  
82 - ]), 80 + inputFormatters: <TextInputFormatter>[FilteringTextInputFormatter.digitsOnly]),
83 Slider( 81 Slider(
84 // decoration: InputDecoration( 82 // decoration: InputDecoration(
85 // labelText: "speech speed", 83 // labelText: "speech speed",
@@ -108,125 +106,117 @@ class _TtsScreenState extends State<TtsScreen> { @@ -108,125 +106,117 @@ class _TtsScreenState extends State<TtsScreen> {
108 }, 106 },
109 ), 107 ),
110 const SizedBox(height: 5), 108 const SizedBox(height: 5),
111 - Row(  
112 - mainAxisAlignment: MainAxisAlignment.center,  
113 - children: <Widget>[  
114 - OutlinedButton(  
115 - child: Text("Generate"),  
116 - onPressed: () async {  
117 - await _init();  
118 - await _player?.stop();  
119 -  
120 - setState(() {  
121 - _maxSpeakerID = _tts?.numSpeakers ?? 0;  
122 - if (_maxSpeakerID > 0) {  
123 - _maxSpeakerID -= 1;  
124 - }  
125 - });  
126 -  
127 - if (_tts == null) {  
128 - _controller_hint.value = TextEditingValue(  
129 - text: 'Failed to initialize tts',  
130 - );  
131 - return;  
132 - }  
133 -  
134 - _controller_hint.value = TextEditingValue(  
135 - text: '',  
136 - );  
137 -  
138 - final text = _controller_text_input.text.trim();  
139 - if (text == '') {  
140 - _controller_hint.value = TextEditingValue(  
141 - text: 'Please first input your text to generate',  
142 - );  
143 - return;  
144 - }  
145 -  
146 - final sid =  
147 - int.tryParse(_controller_sid.text.trim()) ?? 0;  
148 -  
149 - final stopwatch = Stopwatch();  
150 - stopwatch.start();  
151 - final audio =  
152 - _tts!.generate(text: text, sid: sid, speed: _speed);  
153 - final suffix =  
154 - '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';  
155 - final filename = await generateWaveFilename(suffix);  
156 -  
157 - final ok = sherpa_onnx.writeWave(  
158 - filename: filename,  
159 - samples: audio.samples,  
160 - sampleRate: audio.sampleRate,  
161 - );  
162 -  
163 - if (ok) {  
164 - stopwatch.stop();  
165 - double elapsed =  
166 - stopwatch.elapsed.inMilliseconds.toDouble();  
167 -  
168 - double waveDuration =  
169 - audio.samples.length.toDouble() /  
170 - audio.sampleRate.toDouble();  
171 -  
172 - _controller_hint.value = TextEditingValue(  
173 - text: 'Saved to\n$filename\n'  
174 - 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'  
175 - 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'  
176 - 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '  
177 - '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',  
178 - );  
179 - _lastFilename = filename;  
180 -  
181 - await _player?.play(DeviceFileSource(_lastFilename));  
182 - } else {  
183 - _controller_hint.value = TextEditingValue(  
184 - text: 'Failed to save generated audio',  
185 - );  
186 - }  
187 - },  
188 - ),  
189 - const SizedBox(width: 5),  
190 - OutlinedButton(  
191 - child: Text("Clear"),  
192 - onPressed: () {  
193 - _controller_text_input.value = TextEditingValue(  
194 - text: '',  
195 - );  
196 -  
197 - _controller_hint.value = TextEditingValue(  
198 - text: '',  
199 - );  
200 - },  
201 - ),  
202 - const SizedBox(width: 5),  
203 - OutlinedButton(  
204 - child: Text("Play"),  
205 - onPressed: () async {  
206 - if (_lastFilename == '') {  
207 - _controller_hint.value = TextEditingValue(  
208 - text: 'No generated wave file found',  
209 - );  
210 - return;  
211 - }  
212 - await _player?.stop();  
213 - await _player?.play(DeviceFileSource(_lastFilename));  
214 - _controller_hint.value = TextEditingValue(  
215 - text: 'Playing\n$_lastFilename',  
216 - );  
217 - },  
218 - ),  
219 - const SizedBox(width: 5),  
220 - OutlinedButton(  
221 - child: Text("Stop"),  
222 - onPressed: () async {  
223 - await _player?.stop();  
224 - _controller_hint.value = TextEditingValue(  
225 - text: '',  
226 - );  
227 - },  
228 - ),  
229 - ]), 109 + Row(mainAxisAlignment: MainAxisAlignment.center, children: <Widget>[
  110 + OutlinedButton(
  111 + child: Text("Generate"),
  112 + onPressed: () async {
  113 + await _init();
  114 + await _player?.stop();
  115 +
  116 + setState(() {
  117 + _maxSpeakerID = _tts?.numSpeakers ?? 0;
  118 + if (_maxSpeakerID > 0) {
  119 + _maxSpeakerID -= 1;
  120 + }
  121 + });
  122 +
  123 + if (_tts == null) {
  124 + _controller_hint.value = TextEditingValue(
  125 + text: 'Failed to initialize tts',
  126 + );
  127 + return;
  128 + }
  129 +
  130 + _controller_hint.value = TextEditingValue(
  131 + text: '',
  132 + );
  133 +
  134 + final text = _controller_text_input.text.trim();
  135 + if (text == '') {
  136 + _controller_hint.value = TextEditingValue(
  137 + text: 'Please first input your text to generate',
  138 + );
  139 + return;
  140 + }
  141 +
  142 + final sid = int.tryParse(_controller_sid.text.trim()) ?? 0;
  143 +
  144 + final stopwatch = Stopwatch();
  145 + stopwatch.start();
  146 + final audio = _tts!.generate(text: text, sid: sid, speed: _speed);
  147 + final suffix = '-sid-$sid-speed-${_speed.toStringAsPrecision(2)}';
  148 + final filename = await generateWaveFilename(suffix);
  149 +
  150 + final ok = sherpa_onnx.writeWave(
  151 + filename: filename,
  152 + samples: audio.samples,
  153 + sampleRate: audio.sampleRate,
  154 + );
  155 +
  156 + if (ok) {
  157 + stopwatch.stop();
  158 + double elapsed = stopwatch.elapsed.inMilliseconds.toDouble();
  159 +
  160 + double waveDuration = audio.samples.length.toDouble() / audio.sampleRate.toDouble();
  161 +
  162 + _controller_hint.value = TextEditingValue(
  163 + text: 'Saved to\n$filename\n'
  164 + 'Elapsed: ${(elapsed / 1000).toStringAsPrecision(4)} s\n'
  165 + 'Wave duration: ${waveDuration.toStringAsPrecision(4)} s\n'
  166 + 'RTF: ${(elapsed / 1000).toStringAsPrecision(4)}/${waveDuration.toStringAsPrecision(4)} '
  167 + '= ${(elapsed / 1000 / waveDuration).toStringAsPrecision(3)} ',
  168 + );
  169 + _lastFilename = filename;
  170 +
  171 + await _player?.play(DeviceFileSource(_lastFilename));
  172 + } else {
  173 + _controller_hint.value = TextEditingValue(
  174 + text: 'Failed to save generated audio',
  175 + );
  176 + }
  177 + },
  178 + ),
  179 + const SizedBox(width: 5),
  180 + OutlinedButton(
  181 + child: Text("Clear"),
  182 + onPressed: () {
  183 + _controller_text_input.value = TextEditingValue(
  184 + text: '',
  185 + );
  186 +
  187 + _controller_hint.value = TextEditingValue(
  188 + text: '',
  189 + );
  190 + },
  191 + ),
  192 + const SizedBox(width: 5),
  193 + OutlinedButton(
  194 + child: Text("Play"),
  195 + onPressed: () async {
  196 + if (_lastFilename == '') {
  197 + _controller_hint.value = TextEditingValue(
  198 + text: 'No generated wave file found',
  199 + );
  200 + return;
  201 + }
  202 + await _player?.stop();
  203 + await _player?.play(DeviceFileSource(_lastFilename));
  204 + _controller_hint.value = TextEditingValue(
  205 + text: 'Playing\n$_lastFilename',
  206 + );
  207 + },
  208 + ),
  209 + const SizedBox(width: 5),
  210 + OutlinedButton(
  211 + child: Text("Stop"),
  212 + onPressed: () async {
  213 + await _player?.stop();
  214 + _controller_hint.value = TextEditingValue(
  215 + text: '',
  216 + );
  217 + },
  218 + ),
  219 + ]),
230 const SizedBox(height: 5), 220 const SizedBox(height: 5),
231 TextField( 221 TextField(
232 decoration: InputDecoration( 222 decoration: InputDecoration(
@@ -24,6 +24,12 @@ dependencies: @@ -24,6 +24,12 @@ dependencies:
24 url_launcher: 6.2.6 24 url_launcher: 6.2.6
25 url_launcher_linux: 3.1.0 25 url_launcher_linux: 3.1.0
26 audioplayers: ^5.0.0 26 audioplayers: ^5.0.0
  27 + media_kit:
  28 + media_kit_libs_video:
27 29
28 flutter: 30 flutter:
29 uses-material-design: true 31 uses-material-design: true
  32 +
  33 + assets:
  34 + - assets/vits-melo-tts-zh_en/
  35 + - assets/vits-melo-tts-zh_en/dict/