正在显示
15 个修改的文件
包含
887 行增加
和
16 行删除
| @@ -164,6 +164,19 @@ jobs: | @@ -164,6 +164,19 @@ jobs: | ||
| 164 | cd example/assets | 164 | cd example/assets |
| 165 | 165 | ||
| 166 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 166 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 167 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 168 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 169 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 170 | + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 171 | + rm encoder-epoch-99-avg-1.onnx | ||
| 172 | + rm decoder-epoch-99-avg-1.int8.onnx | ||
| 173 | + rm joiner-epoch-99-avg-1.onnx | ||
| 174 | + rm README.md | ||
| 175 | + rm bpe.model | ||
| 176 | + rm bpe.vocab | ||
| 177 | + rm -rf test_wavs | ||
| 178 | + ls -lh | ||
| 179 | + cd .. | ||
| 167 | 180 | ||
| 168 | - name: Build flutter | 181 | - name: Build flutter |
| 169 | shell: bash | 182 | shell: bash |
| @@ -132,6 +132,19 @@ jobs: | @@ -132,6 +132,19 @@ jobs: | ||
| 132 | # curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | 132 | # curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx |
| 133 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 133 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 134 | # git clone https://github.com/csukuangfj/sr-data | 134 | # git clone https://github.com/csukuangfj/sr-data |
| 135 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 136 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 137 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 138 | + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 139 | + rm encoder-epoch-99-avg-1.onnx | ||
| 140 | + rm decoder-epoch-99-avg-1.int8.onnx | ||
| 141 | + rm joiner-epoch-99-avg-1.onnx | ||
| 142 | + rm README.md | ||
| 143 | + rm bpe.model | ||
| 144 | + rm bpe.vocab | ||
| 145 | + rm -rf test_wavs | ||
| 146 | + ls -lh | ||
| 147 | + cd .. | ||
| 135 | 148 | ||
| 136 | rm -rf sr-data/.git | 149 | rm -rf sr-data/.git |
| 137 | popd | 150 | popd |
| @@ -27,7 +27,7 @@ on: | @@ -27,7 +27,7 @@ on: | ||
| 27 | workflow_dispatch: | 27 | workflow_dispatch: |
| 28 | 28 | ||
| 29 | concurrency: | 29 | concurrency: |
| 30 | - group: flutter-windows-x64${{ github.ref }} | 30 | + group: flutter-windows-x64-${{ github.ref }} |
| 31 | cancel-in-progress: true | 31 | cancel-in-progress: true |
| 32 | 32 | ||
| 33 | jobs: | 33 | jobs: |
| @@ -115,6 +115,19 @@ jobs: | @@ -115,6 +115,19 @@ jobs: | ||
| 115 | cd example/assets | 115 | cd example/assets |
| 116 | 116 | ||
| 117 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | 117 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
| 118 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 119 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 120 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 121 | + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 122 | + rm encoder-epoch-99-avg-1.onnx | ||
| 123 | + rm decoder-epoch-99-avg-1.int8.onnx | ||
| 124 | + rm joiner-epoch-99-avg-1.onnx | ||
| 125 | + rm README.md | ||
| 126 | + rm bpe.model | ||
| 127 | + rm bpe.vocab | ||
| 128 | + rm -rf test_wavs | ||
| 129 | + ls -lh | ||
| 130 | + cd .. | ||
| 118 | 131 | ||
| 119 | - name: Build flutter | 132 | - name: Build flutter |
| 120 | shell: bash | 133 | shell: bash |
| @@ -13,7 +13,7 @@ on: | @@ -13,7 +13,7 @@ on: | ||
| 13 | - cron: "50 23 * * *" | 13 | - cron: "50 23 * * *" |
| 14 | 14 | ||
| 15 | concurrency: | 15 | concurrency: |
| 16 | - group: test-dot-net-nuget | 16 | + group: test-dot-net-nuget-${{ github.ref }} |
| 17 | cancel-in-progress: true | 17 | cancel-in-progress: true |
| 18 | 18 | ||
| 19 | permissions: | 19 | permissions: |
| @@ -26,7 +26,7 @@ on: | @@ -26,7 +26,7 @@ on: | ||
| 26 | workflow_dispatch: | 26 | workflow_dispatch: |
| 27 | 27 | ||
| 28 | concurrency: | 28 | concurrency: |
| 29 | - group: test-dot-net | 29 | + group: test-dot-net-${{ github.ref }} |
| 30 | cancel-in-progress: true | 30 | cancel-in-progress: true |
| 31 | 31 | ||
| 32 | permissions: | 32 | permissions: |
| @@ -61,7 +61,15 @@ jobs: | @@ -61,7 +61,15 @@ jobs: | ||
| 61 | 61 | ||
| 62 | mkdir build | 62 | mkdir build |
| 63 | cd build | 63 | cd build |
| 64 | - cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release .. | 64 | + cmake \ |
| 65 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 66 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 67 | + -DCMAKE_BUILD_TYPE=Release \ | ||
| 68 | + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ | ||
| 69 | + -DBUILD_ESPEAK_NG_EXE=OFF \ | ||
| 70 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | ||
| 71 | + .. | ||
| 72 | + | ||
| 65 | cmake --build . --target install --config Release | 73 | cmake --build . --target install --config Release |
| 66 | 74 | ||
| 67 | - name: Build sherpa-onnx for windows x86 | 75 | - name: Build sherpa-onnx for windows x86 |
| @@ -74,7 +82,15 @@ jobs: | @@ -74,7 +82,15 @@ jobs: | ||
| 74 | 82 | ||
| 75 | mkdir build-win32 | 83 | mkdir build-win32 |
| 76 | cd build-win32 | 84 | cd build-win32 |
| 77 | - cmake -A Win32 -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install -DCMAKE_BUILD_TYPE=Release .. | 85 | + cmake \ |
| 86 | + -A Win32 \ | ||
| 87 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 88 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 89 | + -DCMAKE_BUILD_TYPE=Release \ | ||
| 90 | + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ | ||
| 91 | + -DBUILD_ESPEAK_NG_EXE=OFF \ | ||
| 92 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | ||
| 93 | + .. | ||
| 78 | cmake --build . --target install --config Release | 94 | cmake --build . --target install --config Release |
| 79 | 95 | ||
| 80 | - uses: actions/upload-artifact@v4 | 96 | - uses: actions/upload-artifact@v4 |
不能预览此文件类型
sherpa-onnx/flutter/example/assets/vad.ico
0 → 100644
不能预览此文件类型
| @@ -2,9 +2,8 @@ | @@ -2,9 +2,8 @@ | ||
| 2 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 2 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 3 | import 'package:flutter/material.dart'; | 3 | import 'package:flutter/material.dart'; |
| 4 | 4 | ||
| 5 | -import "./speaker_identification_test.dart"; | ||
| 6 | -import "./vad_test.dart"; | ||
| 7 | -import './home.dart'; | 5 | +import './vad.dart'; |
| 6 | +import './streaming_asr.dart'; | ||
| 8 | import './info.dart'; | 7 | import './info.dart'; |
| 9 | 8 | ||
| 10 | void main() { | 9 | void main() { |
| @@ -20,7 +19,7 @@ class MyApp extends StatelessWidget { | @@ -20,7 +19,7 @@ class MyApp extends StatelessWidget { | ||
| 20 | theme: ThemeData( | 19 | theme: ThemeData( |
| 21 | primarySwatch: Colors.blue, | 20 | primarySwatch: Colors.blue, |
| 22 | ), | 21 | ), |
| 23 | - home: const MyHomePage(title: 'Next-gen Kaldi: VAD demo'), | 22 | + home: const MyHomePage(title: 'Next-gen Kaldi Demo'), |
| 24 | ); | 23 | ); |
| 25 | } | 24 | } |
| 26 | } | 25 | } |
| @@ -35,7 +34,8 @@ class MyHomePage extends StatefulWidget { | @@ -35,7 +34,8 @@ class MyHomePage extends StatefulWidget { | ||
| 35 | class _MyHomePageState extends State<MyHomePage> { | 34 | class _MyHomePageState extends State<MyHomePage> { |
| 36 | int _currentIndex = 0; | 35 | int _currentIndex = 0; |
| 37 | final List<Widget> _tabs = [ | 36 | final List<Widget> _tabs = [ |
| 38 | - HomeScreen(), | 37 | + StreamingAsrScreen(), |
| 38 | + VadScreen(), | ||
| 39 | InfoScreen(), | 39 | InfoScreen(), |
| 40 | ]; | 40 | ]; |
| 41 | @override | 41 | @override |
| @@ -52,10 +52,15 @@ class _MyHomePageState extends State<MyHomePage> { | @@ -52,10 +52,15 @@ class _MyHomePageState extends State<MyHomePage> { | ||
| 52 | _currentIndex = index; | 52 | _currentIndex = index; |
| 53 | }); | 53 | }); |
| 54 | }, | 54 | }, |
| 55 | + // https://www.xiconeditor.com/ | ||
| 55 | items: [ | 56 | items: [ |
| 56 | BottomNavigationBarItem( | 57 | BottomNavigationBarItem( |
| 57 | - icon: Icon(Icons.home), | ||
| 58 | - label: 'Home', | 58 | + icon: new Image.asset("assets/streaming-asr.ico"), |
| 59 | + label: '', | ||
| 60 | + ), | ||
| 61 | + BottomNavigationBarItem( | ||
| 62 | + icon: new Image.asset("assets/vad.ico"), | ||
| 63 | + label: '', | ||
| 59 | ), | 64 | ), |
| 60 | BottomNavigationBarItem( | 65 | BottomNavigationBarItem( |
| 61 | icon: Icon(Icons.info), | 66 | icon: Icon(Icons.info), |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:async'; | ||
| 3 | + | ||
| 4 | +import 'package:flutter/foundation.dart'; | ||
| 5 | +import 'package:flutter/material.dart'; | ||
| 6 | +import 'package:path/path.dart' as p; | ||
| 7 | +import 'package:path_provider/path_provider.dart'; | ||
| 8 | +import 'package:record/record.dart'; | ||
| 9 | + | ||
| 10 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 11 | + | ||
| 12 | +import './utils.dart'; | ||
| 13 | + | ||
| 14 | +import './streaming_transducer_asr_test.dart'; // TODO(fangjun): remove it | ||
| 15 | + | ||
| 16 | +Future<sherpa_onnx.OnlineRecognizer> createOnlineRecognizer() async { | ||
| 17 | + var encoder = | ||
| 18 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 19 | + var decoder = | ||
| 20 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 21 | + var joiner = | ||
| 22 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx'; | ||
| 23 | + var tokens = | ||
| 24 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 25 | + | ||
| 26 | + encoder = await copyAssetFile(src: encoder, dst: 'encoder.onnx'); | ||
| 27 | + decoder = await copyAssetFile(src: decoder, dst: 'decoder.onnx'); | ||
| 28 | + joiner = await copyAssetFile(src: joiner, dst: 'joiner.onnx'); | ||
| 29 | + tokens = await copyAssetFile(src: tokens, dst: 'tokens.txt'); | ||
| 30 | + | ||
| 31 | + final transducer = sherpa_onnx.OnlineTransducerModelConfig( | ||
| 32 | + encoder: encoder, | ||
| 33 | + decoder: decoder, | ||
| 34 | + joiner: joiner, | ||
| 35 | + ); | ||
| 36 | + | ||
| 37 | + final modelConfig = sherpa_onnx.OnlineModelConfig( | ||
| 38 | + transducer: transducer, | ||
| 39 | + tokens: tokens, | ||
| 40 | + modelType: 'zipformer', | ||
| 41 | + ); | ||
| 42 | + | ||
| 43 | + final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig); | ||
| 44 | + return sherpa_onnx.OnlineRecognizer(config); | ||
| 45 | +} | ||
| 46 | + | ||
| 47 | +class StreamingAsrScreen extends StatefulWidget { | ||
| 48 | + const StreamingAsrScreen({super.key}); | ||
| 49 | + | ||
| 50 | + @override | ||
| 51 | + State<StreamingAsrScreen> createState() => _StreamingAsrScreenState(); | ||
| 52 | +} | ||
| 53 | + | ||
| 54 | +class _StreamingAsrScreenState extends State<StreamingAsrScreen> { | ||
| 55 | + late final TextEditingController _controller; | ||
| 56 | + late final AudioRecorder _audioRecorder; | ||
| 57 | + | ||
| 58 | + String _title = 'Real-time speech recognition'; | ||
| 59 | + String _last = ''; | ||
| 60 | + int _index = 0; | ||
| 61 | + bool _isInitialized = false; | ||
| 62 | + | ||
| 63 | + sherpa_onnx.OnlineRecognizer? _recognizer; | ||
| 64 | + sherpa_onnx.OnlineStream? _stream; | ||
| 65 | + int _sampleRate = 16000; | ||
| 66 | + | ||
| 67 | + StreamSubscription<RecordState>? _recordSub; | ||
| 68 | + RecordState _recordState = RecordState.stop; | ||
| 69 | + | ||
| 70 | + @override | ||
| 71 | + void initState() { | ||
| 72 | + _audioRecorder = AudioRecorder(); | ||
| 73 | + _controller = TextEditingController(); | ||
| 74 | + | ||
| 75 | + _recordSub = _audioRecorder.onStateChanged().listen((recordState) { | ||
| 76 | + _updateRecordState(recordState); | ||
| 77 | + }); | ||
| 78 | + | ||
| 79 | + super.initState(); | ||
| 80 | + } | ||
| 81 | + | ||
| 82 | + Future<void> _start() async { | ||
| 83 | + if (!_isInitialized) { | ||
| 84 | + sherpa_onnx.initBindings(); | ||
| 85 | + _recognizer = await createOnlineRecognizer(); | ||
| 86 | + _stream = _recognizer?.createStream(); | ||
| 87 | + | ||
| 88 | + _isInitialized = true; | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + try { | ||
| 92 | + if (await _audioRecorder.hasPermission()) { | ||
| 93 | + const encoder = AudioEncoder.pcm16bits; | ||
| 94 | + | ||
| 95 | + if (!await _isEncoderSupported(encoder)) { | ||
| 96 | + return; | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + final devs = await _audioRecorder.listInputDevices(); | ||
| 100 | + debugPrint(devs.toString()); | ||
| 101 | + | ||
| 102 | + const config = RecordConfig( | ||
| 103 | + encoder: encoder, | ||
| 104 | + sampleRate: 16000, | ||
| 105 | + numChannels: 1, | ||
| 106 | + ); | ||
| 107 | + | ||
| 108 | + final stream = await _audioRecorder.startStream(config); | ||
| 109 | + | ||
| 110 | + stream.listen( | ||
| 111 | + (data) { | ||
| 112 | + final samplesFloat32 = | ||
| 113 | + convertBytesToFloat32(Uint8List.fromList(data)); | ||
| 114 | + | ||
| 115 | + _stream!.acceptWaveform( | ||
| 116 | + samples: samplesFloat32, sampleRate: _sampleRate); | ||
| 117 | + while (_recognizer!.isReady(_stream!)) { | ||
| 118 | + _recognizer!.decode(_stream!); | ||
| 119 | + } | ||
| 120 | + final text = _recognizer!.getResult(_stream!).text; | ||
| 121 | + String textToDisplay = _last; | ||
| 122 | + if (text != '') { | ||
| 123 | + if (_last == '') { | ||
| 124 | + textToDisplay = '$_index: $text'; | ||
| 125 | + } else { | ||
| 126 | + textToDisplay = '$_index: $text\n$_last'; | ||
| 127 | + } | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + if (_recognizer!.isEndpoint(_stream!)) { | ||
| 131 | + _recognizer!.reset(_stream!); | ||
| 132 | + if (text != '') { | ||
| 133 | + _last = textToDisplay; | ||
| 134 | + _index += 1; | ||
| 135 | + } | ||
| 136 | + } | ||
| 137 | + print('text: $textToDisplay'); | ||
| 138 | + | ||
| 139 | + _controller.value = TextEditingValue( | ||
| 140 | + text: textToDisplay, | ||
| 141 | + selection: TextSelection.collapsed(offset: textToDisplay.length), | ||
| 142 | + ); | ||
| 143 | + }, | ||
| 144 | + onDone: () { | ||
| 145 | + print('stream stopped.'); | ||
| 146 | + }, | ||
| 147 | + ); | ||
| 148 | + } | ||
| 149 | + } catch (e) { | ||
| 150 | + print(e); | ||
| 151 | + } | ||
| 152 | + } | ||
| 153 | + | ||
| 154 | + Future<void> _stop() async { | ||
| 155 | + _stream!.free(); | ||
| 156 | + _stream = _recognizer!.createStream(); | ||
| 157 | + | ||
| 158 | + await _audioRecorder.stop(); | ||
| 159 | + } | ||
| 160 | + | ||
| 161 | + Future<void> _pause() => _audioRecorder.pause(); | ||
| 162 | + | ||
| 163 | + Future<void> _resume() => _audioRecorder.resume(); | ||
| 164 | + | ||
| 165 | + void _updateRecordState(RecordState recordState) { | ||
| 166 | + setState(() => _recordState = recordState); | ||
| 167 | + } | ||
| 168 | + | ||
| 169 | + Future<bool> _isEncoderSupported(AudioEncoder encoder) async { | ||
| 170 | + final isSupported = await _audioRecorder.isEncoderSupported( | ||
| 171 | + encoder, | ||
| 172 | + ); | ||
| 173 | + | ||
| 174 | + if (!isSupported) { | ||
| 175 | + debugPrint('${encoder.name} is not supported on this platform.'); | ||
| 176 | + debugPrint('Supported encoders are:'); | ||
| 177 | + | ||
| 178 | + for (final e in AudioEncoder.values) { | ||
| 179 | + if (await _audioRecorder.isEncoderSupported(e)) { | ||
| 180 | + debugPrint('- ${encoder.name}'); | ||
| 181 | + } | ||
| 182 | + } | ||
| 183 | + } | ||
| 184 | + | ||
| 185 | + return isSupported; | ||
| 186 | + } | ||
| 187 | + | ||
| 188 | + @override | ||
| 189 | + Widget build(BuildContext context) { | ||
| 190 | + return MaterialApp( | ||
| 191 | + home: Scaffold( | ||
| 192 | + body: Column( | ||
| 193 | + mainAxisAlignment: MainAxisAlignment.center, | ||
| 194 | + children: [ | ||
| 195 | + Text(_title), | ||
| 196 | + const SizedBox(height: 50), | ||
| 197 | + TextField( | ||
| 198 | + maxLines: 5, | ||
| 199 | + controller: _controller, | ||
| 200 | + readOnly: true, | ||
| 201 | + ), | ||
| 202 | + const SizedBox(height: 50), | ||
| 203 | + Row( | ||
| 204 | + mainAxisAlignment: MainAxisAlignment.center, | ||
| 205 | + children: <Widget>[ | ||
| 206 | + _buildRecordStopControl(), | ||
| 207 | + const SizedBox(width: 20), | ||
| 208 | + _buildText(), | ||
| 209 | + ], | ||
| 210 | + ), | ||
| 211 | + ], | ||
| 212 | + ), | ||
| 213 | + ), | ||
| 214 | + ); | ||
| 215 | + } | ||
| 216 | + | ||
| 217 | + @override | ||
| 218 | + void dispose() { | ||
| 219 | + _recordSub?.cancel(); | ||
| 220 | + _audioRecorder.dispose(); | ||
| 221 | + _stream?.free(); | ||
| 222 | + _recognizer?.free(); | ||
| 223 | + super.dispose(); | ||
| 224 | + } | ||
| 225 | + | ||
| 226 | + Widget _buildRecordStopControl() { | ||
| 227 | + late Icon icon; | ||
| 228 | + late Color color; | ||
| 229 | + | ||
| 230 | + if (_recordState != RecordState.stop) { | ||
| 231 | + icon = const Icon(Icons.stop, color: Colors.red, size: 30); | ||
| 232 | + color = Colors.red.withOpacity(0.1); | ||
| 233 | + } else { | ||
| 234 | + final theme = Theme.of(context); | ||
| 235 | + icon = Icon(Icons.mic, color: theme.primaryColor, size: 30); | ||
| 236 | + color = theme.primaryColor.withOpacity(0.1); | ||
| 237 | + } | ||
| 238 | + | ||
| 239 | + return ClipOval( | ||
| 240 | + child: Material( | ||
| 241 | + color: color, | ||
| 242 | + child: InkWell( | ||
| 243 | + child: SizedBox(width: 56, height: 56, child: icon), | ||
| 244 | + onTap: () { | ||
| 245 | + (_recordState != RecordState.stop) ? _stop() : _start(); | ||
| 246 | + }, | ||
| 247 | + ), | ||
| 248 | + ), | ||
| 249 | + ); | ||
| 250 | + } | ||
| 251 | + | ||
| 252 | + Widget _buildText() { | ||
| 253 | + if (_recordState == RecordState.stop) { | ||
| 254 | + return const Text("Start"); | ||
| 255 | + } else { | ||
| 256 | + return const Text("Stop"); | ||
| 257 | + } | ||
| 258 | + } | ||
| 259 | +} |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'package:path/path.dart'; | ||
| 3 | +import 'package:path_provider/path_provider.dart'; | ||
| 4 | +import 'package:flutter/services.dart' show rootBundle; | ||
| 5 | +import 'dart:typed_data'; | ||
| 6 | +import "dart:io"; | ||
| 7 | + | ||
| 8 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 9 | +import './utils.dart'; | ||
| 10 | + | ||
| 11 | +Future<void> testStreamingTransducerAsr() async { | ||
| 12 | + var encoder = | ||
| 13 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 14 | + var decoder = | ||
| 15 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 16 | + var joiner = | ||
| 17 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx'; | ||
| 18 | + var tokens = | ||
| 19 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 20 | + | ||
| 21 | + var testWave = | ||
| 22 | + 'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; | ||
| 23 | + | ||
| 24 | + encoder = await copyAssetFile(src: encoder, dst: 'encoder.onnx'); | ||
| 25 | + decoder = await copyAssetFile(src: decoder, dst: 'decoder.onnx'); | ||
| 26 | + joiner = await copyAssetFile(src: joiner, dst: 'joiner.onnx'); | ||
| 27 | + tokens = await copyAssetFile(src: tokens, dst: 'tokens.txt'); | ||
| 28 | + testWave = await copyAssetFile(src: testWave, dst: 'test.wav'); | ||
| 29 | + | ||
| 30 | + final transducer = sherpa_onnx.OnlineTransducerModelConfig( | ||
| 31 | + encoder: encoder, | ||
| 32 | + decoder: decoder, | ||
| 33 | + joiner: joiner, | ||
| 34 | + ); | ||
| 35 | + | ||
| 36 | + final modelConfig = sherpa_onnx.OnlineModelConfig( | ||
| 37 | + transducer: transducer, | ||
| 38 | + tokens: tokens, | ||
| 39 | + modelType: 'zipformer', | ||
| 40 | + ); | ||
| 41 | + | ||
| 42 | + final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig); | ||
| 43 | + print(config); | ||
| 44 | + final recognizer = sherpa_onnx.OnlineRecognizer(config); | ||
| 45 | + | ||
| 46 | + final waveData = sherpa_onnx.readWave(testWave); | ||
| 47 | + final stream = recognizer.createStream(); | ||
| 48 | + | ||
| 49 | + stream.acceptWaveform( | ||
| 50 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 51 | + while (recognizer.isReady(stream)) { | ||
| 52 | + recognizer.decode(stream); | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + final result = recognizer.getResult(stream); | ||
| 56 | + print('result is: ${result}'); | ||
| 57 | + | ||
| 58 | + print('recognizer: ${recognizer.ptr}'); | ||
| 59 | + stream.free(); | ||
| 60 | + recognizer.free(); | ||
| 61 | +} |
| @@ -11,14 +11,14 @@ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | @@ -11,14 +11,14 @@ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 11 | 11 | ||
| 12 | import './utils.dart'; | 12 | import './utils.dart'; |
| 13 | 13 | ||
| 14 | -class HomeScreen extends StatefulWidget { | ||
| 15 | - const HomeScreen({super.key}); | 14 | +class VadScreen extends StatefulWidget { |
| 15 | + const VadScreen({super.key}); | ||
| 16 | 16 | ||
| 17 | @override | 17 | @override |
| 18 | - State<HomeScreen> createState() => _HomeScreenState(); | 18 | + State<VadScreen> createState() => _VadScreenState(); |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | -class _HomeScreenState extends State<HomeScreen> { | 21 | +class _VadScreenState extends State<VadScreen> { |
| 22 | late final AudioRecorder _audioRecorder; | 22 | late final AudioRecorder _audioRecorder; |
| 23 | 23 | ||
| 24 | bool _printed = false; | 24 | bool _printed = false; |
| @@ -73,6 +73,7 @@ flutter: | @@ -73,6 +73,7 @@ flutter: | ||
| 73 | # To add assets to your application, add an assets section, like this: | 73 | # To add assets to your application, add an assets section, like this: |
| 74 | assets: | 74 | assets: |
| 75 | - assets/ | 75 | - assets/ |
| 76 | + - assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ | ||
| 76 | # - assets/sr-data/enroll/ | 77 | # - assets/sr-data/enroll/ |
| 77 | # - assets/sr-data/test/ | 78 | # - assets/sr-data/test/ |
| 78 | # - images/a_dot_ham.jpeg | 79 | # - images/a_dot_ham.jpeg |
| @@ -2,6 +2,7 @@ | @@ -2,6 +2,7 @@ | ||
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | import 'dart:ffi'; | 3 | import 'dart:ffi'; |
| 4 | 4 | ||
| 5 | +export 'src/online_recognizer.dart'; | ||
| 5 | export 'src/online_stream.dart'; | 6 | export 'src/online_stream.dart'; |
| 6 | export 'src/speaker_identification.dart'; | 7 | export 'src/speaker_identification.dart'; |
| 7 | export 'src/vad.dart'; | 8 | export 'src/vad.dart'; |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:convert'; | ||
| 3 | +import 'dart:ffi'; | ||
| 4 | +import 'dart:typed_data'; | ||
| 5 | + | ||
| 6 | +import 'package:ffi/ffi.dart'; | ||
| 7 | + | ||
| 8 | +import './online_stream.dart'; | ||
| 9 | +import './sherpa_onnx_bindings.dart'; | ||
| 10 | + | ||
| 11 | +class FeatureConfig { | ||
| 12 | + const FeatureConfig({this.sampleRate = 16000, this.featureDim = 80}); | ||
| 13 | + | ||
| 14 | + @override | ||
| 15 | + String toString() { | ||
| 16 | + return 'FeatureConfig(sampleRate: $sampleRate, featureDim: $featureDim)'; | ||
| 17 | + } | ||
| 18 | + | ||
| 19 | + final int sampleRate; | ||
| 20 | + final int featureDim; | ||
| 21 | +} | ||
| 22 | + | ||
| 23 | +class OnlineTransducerModelConfig { | ||
| 24 | + const OnlineTransducerModelConfig({ | ||
| 25 | + this.encoder = '', | ||
| 26 | + this.decoder = '', | ||
| 27 | + this.joiner = '', | ||
| 28 | + }); | ||
| 29 | + | ||
| 30 | + @override | ||
| 31 | + String toString() { | ||
| 32 | + return 'OnlineTransducerModelConfig(encoder: $encoder, decoder: $decoder, joiner: $joiner)'; | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + final String encoder; | ||
| 36 | + final String decoder; | ||
| 37 | + final String joiner; | ||
| 38 | +} | ||
| 39 | + | ||
| 40 | +class OnlineParaformerModelConfig { | ||
| 41 | + const OnlineParaformerModelConfig({this.encoder = '', this.decoder = ''}); | ||
| 42 | + | ||
| 43 | + @override | ||
| 44 | + String toString() { | ||
| 45 | + return 'OnlineParaformerModelConfig(encoder: $encoder, decoder: $decoder)'; | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + final String encoder; | ||
| 49 | + final String decoder; | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +class OnlineZipformer2CtcModelConfig { | ||
| 53 | + const OnlineZipformer2CtcModelConfig({this.model = ''}); | ||
| 54 | + | ||
| 55 | + @override | ||
| 56 | + String toString() { | ||
| 57 | + return 'OnlineZipformer2CtcModelConfig(model: $model)'; | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + final String model; | ||
| 61 | +} | ||
| 62 | + | ||
| 63 | +class OnlineModelConfig { | ||
| 64 | + const OnlineModelConfig({ | ||
| 65 | + this.transducer = const OnlineTransducerModelConfig(), | ||
| 66 | + this.paraformer = const OnlineParaformerModelConfig(), | ||
| 67 | + this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(), | ||
| 68 | + required this.tokens, | ||
| 69 | + this.numThreads = 1, | ||
| 70 | + this.provider = 'cpu', | ||
| 71 | + this.debug = true, | ||
| 72 | + this.modelType = '', | ||
| 73 | + }); | ||
| 74 | + | ||
| 75 | + @override | ||
| 76 | + String toString() { | ||
| 77 | + return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType)'; | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + final OnlineTransducerModelConfig transducer; | ||
| 81 | + final OnlineParaformerModelConfig paraformer; | ||
| 82 | + final OnlineZipformer2CtcModelConfig zipformer2Ctc; | ||
| 83 | + | ||
| 84 | + final String tokens; | ||
| 85 | + | ||
| 86 | + final int numThreads; | ||
| 87 | + | ||
| 88 | + final String provider; | ||
| 89 | + | ||
| 90 | + final bool debug; | ||
| 91 | + | ||
| 92 | + final String modelType; | ||
| 93 | +} | ||
| 94 | + | ||
| 95 | +class OnlineCtcFstDecoderConfig { | ||
| 96 | + const OnlineCtcFstDecoderConfig({this.graph = '', this.maxActive = 3000}); | ||
| 97 | + | ||
| 98 | + @override | ||
| 99 | + String toString() { | ||
| 100 | + return 'OnlineCtcFstDecoderConfig(graph: $graph, maxActive: $maxActive)'; | ||
| 101 | + } | ||
| 102 | + | ||
| 103 | + final String graph; | ||
| 104 | + final int maxActive; | ||
| 105 | +} | ||
| 106 | + | ||
| 107 | +class OnlineRecognizerConfig { | ||
| 108 | + const OnlineRecognizerConfig({ | ||
| 109 | + this.feat = const FeatureConfig(), | ||
| 110 | + required this.model, | ||
| 111 | + this.decodingMethod = 'greedy_search', | ||
| 112 | + this.maxActivePaths = 4, | ||
| 113 | + this.enableEndpoint = true, | ||
| 114 | + this.rule1MinTrailingSilence = 2.4, | ||
| 115 | + this.rule2MinTrailingSilence = 1.2, | ||
| 116 | + this.rule3MinUtteranceLength = 20, | ||
| 117 | + this.hotwordsFile = '', | ||
| 118 | + this.hotwordsScore = 1.5, | ||
| 119 | + this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(), | ||
| 120 | + }); | ||
| 121 | + | ||
| 122 | + @override | ||
| 123 | + String toString() { | ||
| 124 | + return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig)'; | ||
| 125 | + } | ||
| 126 | + | ||
| 127 | + final FeatureConfig feat; | ||
| 128 | + final OnlineModelConfig model; | ||
| 129 | + final String decodingMethod; | ||
| 130 | + | ||
| 131 | + final int maxActivePaths; | ||
| 132 | + | ||
| 133 | + final bool enableEndpoint; | ||
| 134 | + | ||
| 135 | + final double rule1MinTrailingSilence; | ||
| 136 | + | ||
| 137 | + final double rule2MinTrailingSilence; | ||
| 138 | + | ||
| 139 | + final double rule3MinUtteranceLength; | ||
| 140 | + | ||
| 141 | + final String hotwordsFile; | ||
| 142 | + | ||
| 143 | + final double hotwordsScore; | ||
| 144 | + | ||
| 145 | + final OnlineCtcFstDecoderConfig ctcFstDecoderConfig; | ||
| 146 | +} | ||
| 147 | + | ||
| 148 | +class OnlineRecognizerResult { | ||
| 149 | + OnlineRecognizerResult( | ||
| 150 | + {required this.text, required this.tokens, required this.timestamps}); | ||
| 151 | + | ||
| 152 | + @override | ||
| 153 | + String toString() { | ||
| 154 | + return 'OnlineRecognizerResult(text: $text, tokens: $tokens, timestamps: $timestamps)'; | ||
| 155 | + } | ||
| 156 | + | ||
| 157 | + final String text; | ||
| 158 | + final List<String> tokens; | ||
| 159 | + final List<double> timestamps; | ||
| 160 | +} | ||
| 161 | + | ||
| 162 | +class OnlineRecognizer { | ||
| 163 | + OnlineRecognizer._({required this.ptr, required this.config}); | ||
| 164 | + | ||
| 165 | + /// The user is responsible to call the OnlineRecognizer.free() | ||
| 166 | + /// method of the returned instance to avoid memory leak. | ||
| 167 | + factory OnlineRecognizer(OnlineRecognizerConfig config) { | ||
| 168 | + final c = calloc<SherpaOnnxOnlineRecognizerConfig>(); | ||
| 169 | + c.ref.feat.sampleRate = config.feat.sampleRate; | ||
| 170 | + c.ref.feat.featureDim = config.feat.featureDim; | ||
| 171 | + | ||
| 172 | + // transducer | ||
| 173 | + c.ref.model.transducer.encoder = | ||
| 174 | + config.model.transducer.encoder.toNativeUtf8(); | ||
| 175 | + c.ref.model.transducer.decoder = | ||
| 176 | + config.model.transducer.decoder.toNativeUtf8(); | ||
| 177 | + c.ref.model.transducer.joiner = | ||
| 178 | + config.model.transducer.joiner.toNativeUtf8(); | ||
| 179 | + | ||
| 180 | + // paraformer | ||
| 181 | + c.ref.model.paraformer.encoder = | ||
| 182 | + config.model.paraformer.encoder.toNativeUtf8(); | ||
| 183 | + c.ref.model.paraformer.decoder = | ||
| 184 | + config.model.paraformer.decoder.toNativeUtf8(); | ||
| 185 | + | ||
| 186 | + // zipformer2Ctc | ||
| 187 | + c.ref.model.zipformer2Ctc.model = | ||
| 188 | + config.model.zipformer2Ctc.model.toNativeUtf8(); | ||
| 189 | + | ||
| 190 | + c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | ||
| 191 | + c.ref.model.numThreads = config.model.numThreads; | ||
| 192 | + c.ref.model.provider = config.model.provider.toNativeUtf8(); | ||
| 193 | + c.ref.model.debug = config.model.debug ? 1 : 0; | ||
| 194 | + c.ref.model.modelType = config.model.modelType.toNativeUtf8(); | ||
| 195 | + | ||
| 196 | + c.ref.decodingMethod = config.decodingMethod.toNativeUtf8(); | ||
| 197 | + c.ref.maxActivePaths = config.maxActivePaths; | ||
| 198 | + c.ref.enableEndpoint = config.enableEndpoint ? 1 : 0; | ||
| 199 | + c.ref.rule1MinTrailingSilence = config.rule1MinTrailingSilence; | ||
| 200 | + c.ref.rule2MinTrailingSilence = config.rule2MinTrailingSilence; | ||
| 201 | + c.ref.rule3MinUtteranceLength = config.rule3MinUtteranceLength; | ||
| 202 | + c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8(); | ||
| 203 | + c.ref.hotwordsScore = config.hotwordsScore; | ||
| 204 | + | ||
| 205 | + c.ref.ctcFstDecoderConfig.graph = | ||
| 206 | + config.ctcFstDecoderConfig.graph.toNativeUtf8(); | ||
| 207 | + c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive; | ||
| 208 | + | ||
| 209 | + final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr; | ||
| 210 | + | ||
| 211 | + calloc.free(c.ref.ctcFstDecoderConfig.graph); | ||
| 212 | + calloc.free(c.ref.hotwordsFile); | ||
| 213 | + calloc.free(c.ref.decodingMethod); | ||
| 214 | + calloc.free(c.ref.model.modelType); | ||
| 215 | + calloc.free(c.ref.model.provider); | ||
| 216 | + calloc.free(c.ref.model.tokens); | ||
| 217 | + calloc.free(c.ref.model.zipformer2Ctc.model); | ||
| 218 | + calloc.free(c.ref.model.paraformer.encoder); | ||
| 219 | + calloc.free(c.ref.model.paraformer.decoder); | ||
| 220 | + | ||
| 221 | + calloc.free(c.ref.model.transducer.encoder); | ||
| 222 | + calloc.free(c.ref.model.transducer.decoder); | ||
| 223 | + calloc.free(c.ref.model.transducer.joiner); | ||
| 224 | + calloc.free(c); | ||
| 225 | + | ||
| 226 | + return OnlineRecognizer._(ptr: ptr, config: config); | ||
| 227 | + } | ||
| 228 | + | ||
| 229 | + void free() { | ||
| 230 | + SherpaOnnxBindings.destroyOnlineRecognizer?.call(ptr); | ||
| 231 | + ptr = nullptr; | ||
| 232 | + } | ||
| 233 | + | ||
| 234 | + /// The user has to invoke stream.free() on the returned instance | ||
| 235 | + /// to avoid memory leak | ||
| 236 | + OnlineStream createStream({String hotwords = ''}) { | ||
| 237 | + if (hotwords == '') { | ||
| 238 | + final p = SherpaOnnxBindings.createOnlineStream?.call(ptr) ?? nullptr; | ||
| 239 | + return OnlineStream(ptr: p); | ||
| 240 | + } | ||
| 241 | + | ||
| 242 | + final utf8 = hotwords.toNativeUtf8(); | ||
| 243 | + final p = | ||
| 244 | + SherpaOnnxBindings.createOnlineStreamWithHotwords?.call(ptr, utf8) ?? | ||
| 245 | + nullptr; | ||
| 246 | + calloc.free(utf8); | ||
| 247 | + return OnlineStream(ptr: p); | ||
| 248 | + } | ||
| 249 | + | ||
| 250 | + bool isReady(OnlineStream stream) { | ||
| 251 | + int ready = | ||
| 252 | + SherpaOnnxBindings.isOnlineStreamReady?.call(ptr, stream.ptr) ?? 0; | ||
| 253 | + | ||
| 254 | + return ready == 1; | ||
| 255 | + } | ||
| 256 | + | ||
| 257 | + OnlineRecognizerResult getResult(OnlineStream stream) { | ||
| 258 | + final json = | ||
| 259 | + SherpaOnnxBindings.getOnlineStreamResultAsJson?.call(ptr, stream.ptr) ?? | ||
| 260 | + nullptr; | ||
| 261 | + if (json == null) { | ||
| 262 | + return OnlineRecognizerResult(text: '', tokens: [], timestamps: []); | ||
| 263 | + } | ||
| 264 | + | ||
| 265 | + final parsedJson = jsonDecode(json.toDartString()); | ||
| 266 | + | ||
| 267 | + SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json); | ||
| 268 | + | ||
| 269 | + return OnlineRecognizerResult( | ||
| 270 | + text: parsedJson['text'], | ||
| 271 | + tokens: List<String>.from(parsedJson['tokens']), | ||
| 272 | + timestamps: List<double>.from(parsedJson['timestamps'])); | ||
| 273 | + } | ||
| 274 | + | ||
| 275 | + void reset(OnlineStream stream) { | ||
| 276 | + SherpaOnnxBindings.reset?.call(ptr, stream.ptr); | ||
| 277 | + } | ||
| 278 | + | ||
| 279 | + void decode(OnlineStream stream) { | ||
| 280 | + SherpaOnnxBindings.decodeOnlineStream?.call(ptr, stream.ptr); | ||
| 281 | + } | ||
| 282 | + | ||
| 283 | + bool isEndpoint(OnlineStream stream) { | ||
| 284 | + int yes = SherpaOnnxBindings.isEndpoint?.call(ptr, stream.ptr) ?? 0; | ||
| 285 | + | ||
| 286 | + return yes == 1; | ||
| 287 | + } | ||
| 288 | + | ||
| 289 | + Pointer<SherpaOnnxOnlineRecognizer> ptr; | ||
| 290 | + OnlineRecognizerConfig config; | ||
| 291 | +} |
| @@ -2,6 +2,82 @@ | @@ -2,6 +2,82 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'package:ffi/ffi.dart'; | 3 | import 'package:ffi/ffi.dart'; |
| 4 | 4 | ||
| 5 | +final class SherpaOnnxFeatureConfig extends Struct { | ||
| 6 | + @Int32() | ||
| 7 | + external int sampleRate; | ||
| 8 | + | ||
| 9 | + @Int32() | ||
| 10 | + external int featureDim; | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +final class SherpaOnnxOnlineTransducerModelConfig extends Struct { | ||
| 14 | + external Pointer<Utf8> encoder; | ||
| 15 | + external Pointer<Utf8> decoder; | ||
| 16 | + external Pointer<Utf8> joiner; | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +final class SherpaOnnxOnlineParaformerModelConfig extends Struct { | ||
| 20 | + external Pointer<Utf8> encoder; | ||
| 21 | + external Pointer<Utf8> decoder; | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct { | ||
| 25 | + external Pointer<Utf8> model; | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +final class SherpaOnnxOnlineModelConfig extends Struct { | ||
| 29 | + external SherpaOnnxOnlineTransducerModelConfig transducer; | ||
| 30 | + external SherpaOnnxOnlineParaformerModelConfig paraformer; | ||
| 31 | + external SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2Ctc; | ||
| 32 | + | ||
| 33 | + external Pointer<Utf8> tokens; | ||
| 34 | + | ||
| 35 | + @Int32() | ||
| 36 | + external int numThreads; | ||
| 37 | + | ||
| 38 | + external Pointer<Utf8> provider; | ||
| 39 | + | ||
| 40 | + @Int32() | ||
| 41 | + external int debug; | ||
| 42 | + | ||
| 43 | + external Pointer<Utf8> modelType; | ||
| 44 | +} | ||
| 45 | + | ||
| 46 | +final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct { | ||
| 47 | + external Pointer<Utf8> graph; | ||
| 48 | + | ||
| 49 | + @Int32() | ||
| 50 | + external int maxActive; | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | +final class SherpaOnnxOnlineRecognizerConfig extends Struct { | ||
| 54 | + external SherpaOnnxFeatureConfig feat; | ||
| 55 | + external SherpaOnnxOnlineModelConfig model; | ||
| 56 | + external Pointer<Utf8> decodingMethod; | ||
| 57 | + | ||
| 58 | + @Int32() | ||
| 59 | + external int maxActivePaths; | ||
| 60 | + | ||
| 61 | + @Int32() | ||
| 62 | + external int enableEndpoint; | ||
| 63 | + | ||
| 64 | + @Float() | ||
| 65 | + external double rule1MinTrailingSilence; | ||
| 66 | + | ||
| 67 | + @Float() | ||
| 68 | + external double rule2MinTrailingSilence; | ||
| 69 | + | ||
| 70 | + @Float() | ||
| 71 | + external double rule3MinUtteranceLength; | ||
| 72 | + | ||
| 73 | + external Pointer<Utf8> hotwordsFile; | ||
| 74 | + | ||
| 75 | + @Float() | ||
| 76 | + external double hotwordsScore; | ||
| 77 | + | ||
| 78 | + external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig; | ||
| 79 | +} | ||
| 80 | + | ||
| 5 | final class SherpaOnnxSileroVadModelConfig extends Struct { | 81 | final class SherpaOnnxSileroVadModelConfig extends Struct { |
| 6 | external Pointer<Utf8> model; | 82 | external Pointer<Utf8> model; |
| 7 | 83 | ||
| @@ -71,10 +147,66 @@ final class SherpaOnnxVoiceActivityDetector extends Opaque {} | @@ -71,10 +147,66 @@ final class SherpaOnnxVoiceActivityDetector extends Opaque {} | ||
| 71 | 147 | ||
| 72 | final class SherpaOnnxOnlineStream extends Opaque {} | 148 | final class SherpaOnnxOnlineStream extends Opaque {} |
| 73 | 149 | ||
| 150 | +final class SherpaOnnxOnlineRecognizer extends Opaque {} | ||
| 151 | + | ||
| 74 | final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | 152 | final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} |
| 75 | 153 | ||
| 76 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} | 154 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} |
| 77 | 155 | ||
| 156 | +typedef CreateOnlineRecognizerNative = Pointer<SherpaOnnxOnlineRecognizer> | ||
| 157 | + Function(Pointer<SherpaOnnxOnlineRecognizerConfig>); | ||
| 158 | + | ||
| 159 | +typedef CreateOnlineRecognizer = CreateOnlineRecognizerNative; | ||
| 160 | + | ||
| 161 | +typedef DestroyOnlineRecognizerNative = Void Function( | ||
| 162 | + Pointer<SherpaOnnxOnlineRecognizer>); | ||
| 163 | + | ||
| 164 | +typedef DestroyOnlineRecognizer = void Function( | ||
| 165 | + Pointer<SherpaOnnxOnlineRecognizer>); | ||
| 166 | + | ||
| 167 | +typedef CreateOnlineStreamNative = Pointer<SherpaOnnxOnlineStream> Function( | ||
| 168 | + Pointer<SherpaOnnxOnlineRecognizer>); | ||
| 169 | + | ||
| 170 | +typedef CreateOnlineStream = CreateOnlineStreamNative; | ||
| 171 | + | ||
| 172 | +typedef CreateOnlineStreamWithHotwordsNative = Pointer<SherpaOnnxOnlineStream> | ||
| 173 | + Function(Pointer<SherpaOnnxOnlineRecognizer>, Pointer<Utf8>); | ||
| 174 | + | ||
| 175 | +typedef CreateOnlineStreamWithHotwords = CreateOnlineStreamWithHotwordsNative; | ||
| 176 | + | ||
| 177 | +typedef IsOnlineStreamReadyNative = Int32 Function( | ||
| 178 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 179 | + | ||
| 180 | +typedef IsOnlineStreamReady = int Function( | ||
| 181 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 182 | + | ||
| 183 | +typedef DecodeOnlineStreamNative = Void Function( | ||
| 184 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 185 | + | ||
| 186 | +typedef DecodeOnlineStream = void Function( | ||
| 187 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 188 | + | ||
| 189 | +typedef GetOnlineStreamResultAsJsonNative = Pointer<Utf8> Function( | ||
| 190 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 191 | + | ||
| 192 | +typedef GetOnlineStreamResultAsJson = GetOnlineStreamResultAsJsonNative; | ||
| 193 | + | ||
| 194 | +typedef ResetNative = Void Function( | ||
| 195 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 196 | + | ||
| 197 | +typedef Reset = void Function( | ||
| 198 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 199 | + | ||
| 200 | +typedef IsEndpointNative = Int32 Function( | ||
| 201 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 202 | + | ||
| 203 | +typedef IsEndpoint = int Function( | ||
| 204 | + Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>); | ||
| 205 | + | ||
| 206 | +typedef DestroyOnlineStreamResultJsonNative = Void Function(Pointer<Utf8>); | ||
| 207 | + | ||
| 208 | +typedef DestroyOnlineStreamResultJson = void Function(Pointer<Utf8>); | ||
| 209 | + | ||
| 78 | typedef SherpaOnnxCreateVoiceActivityDetectorNative | 210 | typedef SherpaOnnxCreateVoiceActivityDetectorNative |
| 79 | = Pointer<SherpaOnnxVoiceActivityDetector> Function( | 211 | = Pointer<SherpaOnnxVoiceActivityDetector> Function( |
| 80 | Pointer<SherpaOnnxVadModelConfig>, Float); | 212 | Pointer<SherpaOnnxVadModelConfig>, Float); |
| @@ -356,6 +488,26 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | @@ -356,6 +488,26 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | ||
| 356 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); | 488 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); |
| 357 | 489 | ||
| 358 | class SherpaOnnxBindings { | 490 | class SherpaOnnxBindings { |
| 491 | + static CreateOnlineRecognizer? createOnlineRecognizer; | ||
| 492 | + | ||
| 493 | + static DestroyOnlineRecognizer? destroyOnlineRecognizer; | ||
| 494 | + | ||
| 495 | + static CreateOnlineStream? createOnlineStream; | ||
| 496 | + | ||
| 497 | + static CreateOnlineStreamWithHotwords? createOnlineStreamWithHotwords; | ||
| 498 | + | ||
| 499 | + static IsOnlineStreamReady? isOnlineStreamReady; | ||
| 500 | + | ||
| 501 | + static DecodeOnlineStream? decodeOnlineStream; | ||
| 502 | + | ||
| 503 | + static GetOnlineStreamResultAsJson? getOnlineStreamResultAsJson; | ||
| 504 | + | ||
| 505 | + static Reset? reset; | ||
| 506 | + | ||
| 507 | + static IsEndpoint? isEndpoint; | ||
| 508 | + | ||
| 509 | + static DestroyOnlineStreamResultJson? destroyOnlineStreamResultJson; | ||
| 510 | + | ||
| 359 | static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector; | 511 | static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector; |
| 360 | 512 | ||
| 361 | static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector; | 513 | static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector; |
| @@ -459,6 +611,52 @@ class SherpaOnnxBindings { | @@ -459,6 +611,52 @@ class SherpaOnnxBindings { | ||
| 459 | static SherpaOnnxFreeWave? freeWave; | 611 | static SherpaOnnxFreeWave? freeWave; |
| 460 | 612 | ||
| 461 | static void init(DynamicLibrary dynamicLibrary) { | 613 | static void init(DynamicLibrary dynamicLibrary) { |
| 614 | + createOnlineRecognizer ??= dynamicLibrary | ||
| 615 | + .lookup<NativeFunction<CreateOnlineRecognizerNative>>( | ||
| 616 | + 'CreateOnlineRecognizer') | ||
| 617 | + .asFunction(); | ||
| 618 | + | ||
| 619 | + destroyOnlineRecognizer ??= dynamicLibrary | ||
| 620 | + .lookup<NativeFunction<DestroyOnlineRecognizerNative>>( | ||
| 621 | + 'DestroyOnlineRecognizer') | ||
| 622 | + .asFunction(); | ||
| 623 | + | ||
| 624 | + createOnlineStream ??= dynamicLibrary | ||
| 625 | + .lookup<NativeFunction<CreateOnlineStreamNative>>('CreateOnlineStream') | ||
| 626 | + .asFunction(); | ||
| 627 | + | ||
| 628 | + createOnlineStreamWithHotwords ??= dynamicLibrary | ||
| 629 | + .lookup<NativeFunction<CreateOnlineStreamWithHotwordsNative>>( | ||
| 630 | + 'CreateOnlineStreamWithHotwords') | ||
| 631 | + .asFunction(); | ||
| 632 | + | ||
| 633 | + isOnlineStreamReady ??= dynamicLibrary | ||
| 634 | + .lookup<NativeFunction<IsOnlineStreamReadyNative>>( | ||
| 635 | + 'IsOnlineStreamReady') | ||
| 636 | + .asFunction(); | ||
| 637 | + | ||
| 638 | + decodeOnlineStream ??= dynamicLibrary | ||
| 639 | + .lookup<NativeFunction<DecodeOnlineStreamNative>>('DecodeOnlineStream') | ||
| 640 | + .asFunction(); | ||
| 641 | + | ||
| 642 | + getOnlineStreamResultAsJson ??= dynamicLibrary | ||
| 643 | + .lookup<NativeFunction<GetOnlineStreamResultAsJsonNative>>( | ||
| 644 | + 'GetOnlineStreamResultAsJson') | ||
| 645 | + .asFunction(); | ||
| 646 | + | ||
| 647 | + reset ??= dynamicLibrary | ||
| 648 | + .lookup<NativeFunction<ResetNative>>('Reset') | ||
| 649 | + .asFunction(); | ||
| 650 | + | ||
| 651 | + isEndpoint ??= dynamicLibrary | ||
| 652 | + .lookup<NativeFunction<IsEndpointNative>>('IsEndpoint') | ||
| 653 | + .asFunction(); | ||
| 654 | + | ||
| 655 | + destroyOnlineStreamResultJson ??= dynamicLibrary | ||
| 656 | + .lookup<NativeFunction<DestroyOnlineStreamResultJsonNative>>( | ||
| 657 | + 'DestroyOnlineStreamResultJson') | ||
| 658 | + .asFunction(); | ||
| 659 | + | ||
| 462 | createVoiceActivityDetector ??= dynamicLibrary | 660 | createVoiceActivityDetector ??= dynamicLibrary |
| 463 | .lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>( | 661 | .lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>( |
| 464 | 'SherpaOnnxCreateVoiceActivityDetector') | 662 | 'SherpaOnnxCreateVoiceActivityDetector') |
-
请 注册 或 登录 后发表评论