Committed by
GitHub
Add Dart API for spoken language identification (#2596)
正在显示
9 个修改的文件
包含
506 行增加
和
0 行删除
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This example shows how to use the Dart API from sherpa-onnx for spoken language identification. | ||
| 4 | + | ||
| 5 | +| File | Description| | ||
| 6 | +|------|------------| | ||
| 7 | +|[./bin/spoken_language_identification.dart](./bin/spoken_language_identification.dart)| Use a whisper model for spoken language identification. See also [./run-whisper.sh](./run-whisper.sh)| |
| 1 | +include: package:lints/recommended.yaml | ||
| 2 | + | ||
| 3 | +analyzer: | ||
| 4 | + language: | ||
| 5 | + strict-casts: true | ||
| 6 | + strict-inference: true | ||
| 7 | + strict-raw-types: true | ||
| 8 | + | ||
| 9 | +linter: | ||
| 10 | + rules: | ||
| 11 | + - always_use_package_imports | ||
| 12 | + - avoid_dynamic_calls | ||
| 13 | + - cancel_subscriptions | ||
| 14 | + - close_sinks | ||
| 15 | + - unawaited_futures | ||
| 16 | + - use_super_parameters |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:isolate'; | ||
| 4 | +import 'package:path/path.dart' as p; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +Future<void> initSherpaOnnx() async { | ||
| 8 | + String platform = ''; | ||
| 9 | + | ||
| 10 | + if (Platform.isMacOS) { | ||
| 11 | + platform = 'macos'; | ||
| 12 | + } else if (Platform.isLinux) { | ||
| 13 | + platform = 'linux'; | ||
| 14 | + } else if (Platform.isWindows) { | ||
| 15 | + platform = 'windows'; | ||
| 16 | + } else { | ||
| 17 | + throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}'); | ||
| 18 | + } | ||
| 19 | + | ||
| 20 | + var uri = await Isolate.resolvePackageUri( | ||
| 21 | + Uri.parse('package:sherpa_onnx_$platform/any_path_is_ok_here.dart')); | ||
| 22 | + | ||
| 23 | + if (uri == null) { | ||
| 24 | + print('File not found'); | ||
| 25 | + exit(1); | ||
| 26 | + } | ||
| 27 | + | ||
| 28 | + var libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform); | ||
| 29 | + if (platform == 'linux') { | ||
| 30 | + final arch = Platform.version.contains('arm64') || | ||
| 31 | + Platform.version.contains('aarch64') | ||
| 32 | + ? 'aarch64' | ||
| 33 | + : 'x64'; | ||
| 34 | + libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform, arch); | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + sherpa_onnx.initBindings(libPath); | ||
| 38 | +} |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | +import './init.dart'; | ||
| 7 | + | ||
| 8 | +void main(List<String> arguments) async { | ||
| 9 | + await initSherpaOnnx(); | ||
| 10 | + | ||
| 11 | + final parser = ArgParser() | ||
| 12 | + ..addOption('encoder', help: 'Path to the whisper encoder model') | ||
| 13 | + ..addOption('decoder', help: 'Path to the whisper decoder model') | ||
| 14 | + ..addOption('tail-paddings', help: 'Tail paddings for the whisper model', defaultsTo: '0') | ||
| 15 | + ..addOption('wav', help: 'Path to test.wav for language identification') | ||
| 16 | + ..addFlag('help', abbr: 'h', help: 'Show this help message', negatable: false); | ||
| 17 | + | ||
| 18 | + final res = parser.parse(arguments); | ||
| 19 | + if (res['help'] as bool) { | ||
| 20 | + print(parser.usage); | ||
| 21 | + exit(0); | ||
| 22 | + } | ||
| 23 | + | ||
| 24 | + if (res['encoder'] == null || res['decoder'] == null || res['wav'] == null) { | ||
| 25 | + print(parser.usage); | ||
| 26 | + exit(1); | ||
| 27 | + } | ||
| 28 | + | ||
| 29 | + final encoder = res['encoder'] as String; | ||
| 30 | + final decoder = res['decoder'] as String; | ||
| 31 | + final tailPaddings = int.tryParse(res['tail-paddings'] as String) ?? 0; | ||
| 32 | + final wav = res['wav'] as String; | ||
| 33 | + | ||
| 34 | + final whisperConfig = sherpa_onnx.SpokenLanguageIdentificationWhisperConfig( | ||
| 35 | + encoder: encoder, | ||
| 36 | + decoder: decoder, | ||
| 37 | + tailPaddings: tailPaddings, | ||
| 38 | + ); | ||
| 39 | + | ||
| 40 | + final config = sherpa_onnx.SpokenLanguageIdentificationConfig( | ||
| 41 | + whisper: whisperConfig, | ||
| 42 | + numThreads: 1, | ||
| 43 | + debug: true, | ||
| 44 | + provider: 'cpu', | ||
| 45 | + ); | ||
| 46 | + | ||
| 47 | + final slid = sherpa_onnx.SpokenLanguageIdentification(config); | ||
| 48 | + | ||
| 49 | + final waveData = sherpa_onnx.readWave(wav); | ||
| 50 | + | ||
| 51 | + final stream = slid.createStream(); | ||
| 52 | + stream.acceptWaveform(samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 53 | + | ||
| 54 | + final result = slid.compute(stream); | ||
| 55 | + | ||
| 56 | + print('File: $wav'); | ||
| 57 | + print('Detected language: ${result.lang}'); | ||
| 58 | + | ||
| 59 | + stream.free(); | ||
| 60 | + slid.free(); | ||
| 61 | +} |
| 1 | +name: spoken_language_identification | ||
| 2 | + | ||
| 3 | +description: > | ||
| 4 | + This example demonstrates how to use the Dart API for spoken language identification. | ||
| 5 | + | ||
| 6 | +version: 1.0.0 | ||
| 7 | + | ||
| 8 | +environment: | ||
| 9 | + sdk: ">=3.0.0 <4.0.0" | ||
| 10 | + | ||
| 11 | +# Add regular dependencies here. | ||
| 12 | +dependencies: | ||
| 13 | + sherpa_onnx: ^1.12.13 | ||
| 14 | + # sherpa_onnx: | ||
| 15 | + # path: ../../flutter/sherpa_onnx | ||
| 16 | + path: ^1.9.0 | ||
| 17 | + args: ^2.5.0 | ||
| 18 | + | ||
| 19 | +dev_dependencies: | ||
| 20 | + lints: ^3.0.0 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 10 | + rm sherpa-onnx-whisper-tiny.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +# Download test WAV files | ||
| 14 | +waves=( | ||
| 15 | +# ar-arabic.wav | ||
| 16 | +# bg-bulgarian.wav | ||
| 17 | +# cs-czech.wav | ||
| 18 | +# da-danish.wav | ||
| 19 | +# de-german.wav | ||
| 20 | +# el-greek.wav | ||
| 21 | +en-english.wav | ||
| 22 | +es-spanish.wav | ||
| 23 | +# fa-persian.wav | ||
| 24 | +# fi-finnish.wav | ||
| 25 | +# fr-french.wav | ||
| 26 | +# hi-hindi.wav | ||
| 27 | +# hr-croatian.wav | ||
| 28 | +# id-indonesian.wav | ||
| 29 | +# it-italian.wav | ||
| 30 | +# ja-japanese.wav | ||
| 31 | +# ko-korean.wav | ||
| 32 | +# nl-dutch.wav | ||
| 33 | +# no-norwegian.wav | ||
| 34 | +# pl-polish.wav | ||
| 35 | +# pt-portuguese.wav | ||
| 36 | +# ro-romanian.wav | ||
| 37 | +ru-russian.wav | ||
| 38 | +# sk-slovak.wav | ||
| 39 | +# sv-swedish.wav | ||
| 40 | +# ta-tamil.wav | ||
| 41 | +# tl-tagalog.wav | ||
| 42 | +# tr-turkish.wav | ||
| 43 | +# uk-ukrainian.wav | ||
| 44 | +zh-chinese.wav | ||
| 45 | +) | ||
| 46 | + | ||
| 47 | +for wav in ${waves[@]}; do | ||
| 48 | + if [ ! -f ./$wav ]; then | ||
| 49 | + echo "Downloading $wav" | ||
| 50 | + curl -SL -O https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/resolve/main/test_wavs/$wav | ||
| 51 | + fi | ||
| 52 | + | ||
| 53 | + echo "Testing $wav" | ||
| 54 | + dart run \ | ||
| 55 | + ./bin/spoken_language_identification.dart \ | ||
| 56 | + --encoder ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx \ | ||
| 57 | + --decoder ./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx \ | ||
| 58 | + --wav ./$wav | ||
| 59 | + | ||
| 60 | + echo "----------------------------------------" | ||
| 61 | +done |
| @@ -15,6 +15,7 @@ export 'src/online_punctuation.dart'; | @@ -15,6 +15,7 @@ export 'src/online_punctuation.dart'; | ||
| 15 | export 'src/online_recognizer.dart'; | 15 | export 'src/online_recognizer.dart'; |
| 16 | export 'src/online_stream.dart'; | 16 | export 'src/online_stream.dart'; |
| 17 | export 'src/speaker_identification.dart'; | 17 | export 'src/speaker_identification.dart'; |
| 18 | +export 'src/spoken_language_identification.dart'; | ||
| 18 | export 'src/tts.dart'; | 19 | export 'src/tts.dart'; |
| 19 | export 'src/vad.dart'; | 20 | export 'src/vad.dart'; |
| 20 | export 'src/version.dart'; | 21 | export 'src/version.dart'; |
| @@ -626,6 +626,32 @@ final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {} | @@ -626,6 +626,32 @@ final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {} | ||
| 626 | 626 | ||
| 627 | final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {} | 627 | final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {} |
| 628 | 628 | ||
| 629 | +final class SherpaOnnxSpokenLanguageIdentificationWhisperConfig extends Struct { | ||
| 630 | + external Pointer<Utf8> encoder; | ||
| 631 | + external Pointer<Utf8> decoder; | ||
| 632 | + | ||
| 633 | + @Int32() | ||
| 634 | + external int tailPaddings; | ||
| 635 | +} | ||
| 636 | + | ||
| 637 | +final class SherpaOnnxSpokenLanguageIdentificationConfig extends Struct { | ||
| 638 | + external SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper; | ||
| 639 | + | ||
| 640 | + @Int32() | ||
| 641 | + external int numThreads; | ||
| 642 | + | ||
| 643 | + @Int32() | ||
| 644 | + external int debug; | ||
| 645 | + | ||
| 646 | + external Pointer<Utf8> provider; | ||
| 647 | +} | ||
| 648 | + | ||
| 649 | +final class SherpaOnnxSpokenLanguageIdentificationResult extends Struct { | ||
| 650 | + external Pointer<Utf8> lang; | ||
| 651 | +} | ||
| 652 | + | ||
| 653 | +final class SherpaOnnxSpokenLanguageIdentification extends Opaque {} | ||
| 654 | + | ||
| 629 | final class SherpaOnnxOfflineSpeechDenoiser extends Opaque {} | 655 | final class SherpaOnnxOfflineSpeechDenoiser extends Opaque {} |
| 630 | 656 | ||
| 631 | typedef SherpaOnnxCreateOfflineSpeechDenoiserNative | 657 | typedef SherpaOnnxCreateOfflineSpeechDenoiserNative |
| @@ -661,6 +687,40 @@ typedef SherpaOnnxDestroyDenoisedAudioNative = Void Function( | @@ -661,6 +687,40 @@ typedef SherpaOnnxDestroyDenoisedAudioNative = Void Function( | ||
| 661 | typedef SherpaOnnxDestroyDenoisedAudio = void Function( | 687 | typedef SherpaOnnxDestroyDenoisedAudio = void Function( |
| 662 | Pointer<SherpaOnnxDenoisedAudio>); | 688 | Pointer<SherpaOnnxDenoisedAudio>); |
| 663 | 689 | ||
| 690 | +typedef SherpaOnnxCreateSpokenLanguageIdentificationNative | ||
| 691 | + = Pointer<SherpaOnnxSpokenLanguageIdentification> Function( | ||
| 692 | + Pointer<SherpaOnnxSpokenLanguageIdentificationConfig>); | ||
| 693 | + | ||
| 694 | +typedef SherpaOnnxCreateSpokenLanguageIdentification | ||
| 695 | + = SherpaOnnxCreateSpokenLanguageIdentificationNative; | ||
| 696 | + | ||
| 697 | +typedef SherpaOnnxDestroySpokenLanguageIdentificationNative = Void Function( | ||
| 698 | + Pointer<SherpaOnnxSpokenLanguageIdentification>); | ||
| 699 | + | ||
| 700 | +typedef SherpaOnnxDestroySpokenLanguageIdentification = void Function( | ||
| 701 | + Pointer<SherpaOnnxSpokenLanguageIdentification>); | ||
| 702 | + | ||
| 703 | +typedef SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative | ||
| 704 | + = Pointer<SherpaOnnxOfflineStream> Function( | ||
| 705 | + Pointer<SherpaOnnxSpokenLanguageIdentification>); | ||
| 706 | + | ||
| 707 | +typedef SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream | ||
| 708 | + = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative; | ||
| 709 | + | ||
| 710 | +typedef SherpaOnnxSpokenLanguageIdentificationComputeNative | ||
| 711 | + = Pointer<SherpaOnnxSpokenLanguageIdentificationResult> Function( | ||
| 712 | + Pointer<SherpaOnnxSpokenLanguageIdentification>, | ||
| 713 | + Pointer<SherpaOnnxOfflineStream>); | ||
| 714 | + | ||
| 715 | +typedef SherpaOnnxSpokenLanguageIdentificationCompute | ||
| 716 | + = SherpaOnnxSpokenLanguageIdentificationComputeNative; | ||
| 717 | + | ||
| 718 | +typedef SherpaOnnxDestroySpokenLanguageIdentificationResultNative = Void | ||
| 719 | + Function(Pointer<SherpaOnnxSpokenLanguageIdentificationResult>); | ||
| 720 | + | ||
| 721 | +typedef SherpaOnnxDestroySpokenLanguageIdentificationResult = void Function( | ||
| 722 | + Pointer<SherpaOnnxSpokenLanguageIdentificationResult>); | ||
| 723 | + | ||
| 664 | typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative | 724 | typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative |
| 665 | = Pointer<SherpaOnnxOfflineSpeakerDiarization> Function( | 725 | = Pointer<SherpaOnnxOfflineSpeakerDiarization> Function( |
| 666 | Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>); | 726 | Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>); |
| @@ -1344,6 +1404,17 @@ class SherpaOnnxBindings { | @@ -1344,6 +1404,17 @@ class SherpaOnnxBindings { | ||
| 1344 | static SherpaOnnxOfflineSpeechDenoiserRun? sherpaOnnxOfflineSpeechDenoiserRun; | 1404 | static SherpaOnnxOfflineSpeechDenoiserRun? sherpaOnnxOfflineSpeechDenoiserRun; |
| 1345 | static SherpaOnnxDestroyDenoisedAudio? sherpaOnnxDestroyDenoisedAudio; | 1405 | static SherpaOnnxDestroyDenoisedAudio? sherpaOnnxDestroyDenoisedAudio; |
| 1346 | 1406 | ||
| 1407 | + static SherpaOnnxCreateSpokenLanguageIdentification? | ||
| 1408 | + sherpaOnnxCreateSpokenLanguageIdentification; | ||
| 1409 | + static SherpaOnnxDestroySpokenLanguageIdentification? | ||
| 1410 | + sherpaOnnxDestroySpokenLanguageIdentification; | ||
| 1411 | + static SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream? | ||
| 1412 | + sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream; | ||
| 1413 | + static SherpaOnnxSpokenLanguageIdentificationCompute? | ||
| 1414 | + sherpaOnnxSpokenLanguageIdentificationCompute; | ||
| 1415 | + static SherpaOnnxDestroySpokenLanguageIdentificationResult? | ||
| 1416 | + sherpaOnnxDestroySpokenLanguageIdentificationResult; | ||
| 1417 | + | ||
| 1347 | static SherpaOnnxCreateOfflineSpeakerDiarization? | 1418 | static SherpaOnnxCreateOfflineSpeakerDiarization? |
| 1348 | sherpaOnnxCreateOfflineSpeakerDiarization; | 1419 | sherpaOnnxCreateOfflineSpeakerDiarization; |
| 1349 | static SherpaOnnxDestroyOfflineSpeakerDiarization? | 1420 | static SherpaOnnxDestroyOfflineSpeakerDiarization? |
| @@ -1574,6 +1645,41 @@ class SherpaOnnxBindings { | @@ -1574,6 +1645,41 @@ class SherpaOnnxBindings { | ||
| 1574 | 'SherpaOnnxDestroyDenoisedAudio') | 1645 | 'SherpaOnnxDestroyDenoisedAudio') |
| 1575 | .asFunction(); | 1646 | .asFunction(); |
| 1576 | 1647 | ||
| 1648 | + sherpaOnnxCreateSpokenLanguageIdentification ??= dynamicLibrary | ||
| 1649 | + .lookup< | ||
| 1650 | + NativeFunction< | ||
| 1651 | + SherpaOnnxCreateSpokenLanguageIdentificationNative>>( | ||
| 1652 | + 'SherpaOnnxCreateSpokenLanguageIdentification') | ||
| 1653 | + .asFunction(); | ||
| 1654 | + | ||
| 1655 | + sherpaOnnxDestroySpokenLanguageIdentification ??= dynamicLibrary | ||
| 1656 | + .lookup< | ||
| 1657 | + NativeFunction< | ||
| 1658 | + SherpaOnnxDestroySpokenLanguageIdentificationNative>>( | ||
| 1659 | + 'SherpaOnnxDestroySpokenLanguageIdentification') | ||
| 1660 | + .asFunction(); | ||
| 1661 | + | ||
| 1662 | + sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream ??= dynamicLibrary | ||
| 1663 | + .lookup< | ||
| 1664 | + NativeFunction< | ||
| 1665 | + SherpaOnnxSpokenLanguageIdentificationCreateOfflineStreamNative>>( | ||
| 1666 | + 'SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream') | ||
| 1667 | + .asFunction(); | ||
| 1668 | + | ||
| 1669 | + sherpaOnnxSpokenLanguageIdentificationCompute ??= dynamicLibrary | ||
| 1670 | + .lookup< | ||
| 1671 | + NativeFunction< | ||
| 1672 | + SherpaOnnxSpokenLanguageIdentificationComputeNative>>( | ||
| 1673 | + 'SherpaOnnxSpokenLanguageIdentificationCompute') | ||
| 1674 | + .asFunction(); | ||
| 1675 | + | ||
| 1676 | + sherpaOnnxDestroySpokenLanguageIdentificationResult ??= dynamicLibrary | ||
| 1677 | + .lookup< | ||
| 1678 | + NativeFunction< | ||
| 1679 | + SherpaOnnxDestroySpokenLanguageIdentificationResultNative>>( | ||
| 1680 | + 'SherpaOnnxDestroySpokenLanguageIdentificationResult') | ||
| 1681 | + .asFunction(); | ||
| 1682 | + | ||
| 1577 | sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary | 1683 | sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary |
| 1578 | .lookup< | 1684 | .lookup< |
| 1579 | NativeFunction< | 1685 | NativeFunction< |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:ffi'; | ||
| 3 | + | ||
| 4 | +import 'package:ffi/ffi.dart'; | ||
| 5 | + | ||
| 6 | +import './offline_stream.dart'; | ||
| 7 | +import './sherpa_onnx_bindings.dart'; | ||
| 8 | +import './utils.dart'; | ||
| 9 | + | ||
| 10 | +class SpokenLanguageIdentificationWhisperConfig { | ||
| 11 | + const SpokenLanguageIdentificationWhisperConfig({ | ||
| 12 | + this.encoder = '', | ||
| 13 | + this.decoder = '', | ||
| 14 | + this.tailPaddings = 0, | ||
| 15 | + }); | ||
| 16 | + | ||
| 17 | + factory SpokenLanguageIdentificationWhisperConfig.fromJson( | ||
| 18 | + Map<String, dynamic> json) { | ||
| 19 | + return SpokenLanguageIdentificationWhisperConfig( | ||
| 20 | + encoder: json['encoder'] as String? ?? '', | ||
| 21 | + decoder: json['decoder'] as String? ?? '', | ||
| 22 | + tailPaddings: json['tailPaddings'] as int? ?? 0, | ||
| 23 | + ); | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + @override | ||
| 27 | + String toString() { | ||
| 28 | + return 'SpokenLanguageIdentificationWhisperConfig(encoder: $encoder, decoder: $decoder, tailPaddings: $tailPaddings)'; | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + Map<String, dynamic> toJson() => { | ||
| 32 | + 'encoder': encoder, | ||
| 33 | + 'decoder': decoder, | ||
| 34 | + 'tailPaddings': tailPaddings, | ||
| 35 | + }; | ||
| 36 | + | ||
| 37 | + final String encoder; | ||
| 38 | + final String decoder; | ||
| 39 | + final int tailPaddings; | ||
| 40 | +} | ||
| 41 | + | ||
| 42 | +class SpokenLanguageIdentificationConfig { | ||
| 43 | + const SpokenLanguageIdentificationConfig({ | ||
| 44 | + this.whisper = const SpokenLanguageIdentificationWhisperConfig(), | ||
| 45 | + this.numThreads = 1, | ||
| 46 | + this.debug = false, | ||
| 47 | + this.provider = 'cpu', | ||
| 48 | + }); | ||
| 49 | + | ||
| 50 | + factory SpokenLanguageIdentificationConfig.fromJson( | ||
| 51 | + Map<String, dynamic> json) { | ||
| 52 | + return SpokenLanguageIdentificationConfig( | ||
| 53 | + whisper: json['whisper'] != null | ||
| 54 | + ? SpokenLanguageIdentificationWhisperConfig.fromJson( | ||
| 55 | + json['whisper'] as Map<String, dynamic>) | ||
| 56 | + : const SpokenLanguageIdentificationWhisperConfig(), | ||
| 57 | + numThreads: json['numThreads'] as int? ?? 1, | ||
| 58 | + debug: json['debug'] as bool? ?? false, | ||
| 59 | + provider: json['provider'] as String? ?? 'cpu', | ||
| 60 | + ); | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + @override | ||
| 64 | + String toString() { | ||
| 65 | + return 'SpokenLanguageIdentificationConfig(whisper: $whisper, numThreads: $numThreads, debug: $debug, provider: $provider)'; | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + Map<String, dynamic> toJson() => { | ||
| 69 | + 'whisper': whisper.toJson(), | ||
| 70 | + 'numThreads': numThreads, | ||
| 71 | + 'debug': debug, | ||
| 72 | + 'provider': provider, | ||
| 73 | + }; | ||
| 74 | + | ||
| 75 | + final SpokenLanguageIdentificationWhisperConfig whisper; | ||
| 76 | + final int numThreads; | ||
| 77 | + final bool debug; | ||
| 78 | + final String provider; | ||
| 79 | +} | ||
| 80 | + | ||
| 81 | +class SpokenLanguageIdentificationResult { | ||
| 82 | + const SpokenLanguageIdentificationResult({ | ||
| 83 | + required this.lang, | ||
| 84 | + }); | ||
| 85 | + | ||
| 86 | + factory SpokenLanguageIdentificationResult.fromJson( | ||
| 87 | + Map<String, dynamic> json) { | ||
| 88 | + return SpokenLanguageIdentificationResult( | ||
| 89 | + lang: json['lang'] as String? ?? '', | ||
| 90 | + ); | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + @override | ||
| 94 | + String toString() { | ||
| 95 | + return 'SpokenLanguageIdentificationResult(lang: $lang)'; | ||
| 96 | + } | ||
| 97 | + | ||
| 98 | + Map<String, dynamic> toJson() => { | ||
| 99 | + 'lang': lang, | ||
| 100 | + }; | ||
| 101 | + | ||
| 102 | + final String lang; | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +class SpokenLanguageIdentification { | ||
| 106 | + SpokenLanguageIdentification.fromPtr( | ||
| 107 | + {required this.ptr, required this.config}); | ||
| 108 | + | ||
| 109 | + SpokenLanguageIdentification._({required this.ptr, required this.config}); | ||
| 110 | + | ||
| 111 | + void free() { | ||
| 112 | + SherpaOnnxBindings.sherpaOnnxDestroySpokenLanguageIdentification?.call(ptr); | ||
| 113 | + ptr = nullptr; | ||
| 114 | + } | ||
| 115 | + | ||
| 116 | + /// The user is responsible to call the SpokenLanguageIdentification.free() | ||
| 117 | + /// method of the returned instance to avoid memory leak. | ||
| 118 | + factory SpokenLanguageIdentification( | ||
| 119 | + SpokenLanguageIdentificationConfig config) { | ||
| 120 | + final c = convertConfig(config); | ||
| 121 | + | ||
| 122 | + if (SherpaOnnxBindings.sherpaOnnxCreateSpokenLanguageIdentification == | ||
| 123 | + null) { | ||
| 124 | + freeConfig(c); | ||
| 125 | + throw Exception("Please initialize sherpa-onnx first"); | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + final ptr = SherpaOnnxBindings.sherpaOnnxCreateSpokenLanguageIdentification | ||
| 129 | + ?.call(c) ?? | ||
| 130 | + nullptr; | ||
| 131 | + | ||
| 132 | + if (ptr == nullptr) { | ||
| 133 | + freeConfig(c); | ||
| 134 | + throw Exception( | ||
| 135 | + "Failed to create spoken language identification. Please check your config"); | ||
| 136 | + } | ||
| 137 | + | ||
| 138 | + freeConfig(c); | ||
| 139 | + | ||
| 140 | + return SpokenLanguageIdentification._(ptr: ptr, config: config); | ||
| 141 | + } | ||
| 142 | + | ||
| 143 | + static Pointer<SherpaOnnxSpokenLanguageIdentificationConfig> convertConfig( | ||
| 144 | + SpokenLanguageIdentificationConfig config) { | ||
| 145 | + final c = calloc<SherpaOnnxSpokenLanguageIdentificationConfig>(); | ||
| 146 | + | ||
| 147 | + c.ref.whisper.encoder = config.whisper.encoder.toNativeUtf8(); | ||
| 148 | + c.ref.whisper.decoder = config.whisper.decoder.toNativeUtf8(); | ||
| 149 | + c.ref.whisper.tailPaddings = config.whisper.tailPaddings; | ||
| 150 | + | ||
| 151 | + c.ref.numThreads = config.numThreads; | ||
| 152 | + c.ref.debug = config.debug ? 1 : 0; | ||
| 153 | + c.ref.provider = config.provider.toNativeUtf8(); | ||
| 154 | + | ||
| 155 | + return c; | ||
| 156 | + } | ||
| 157 | + | ||
| 158 | + static void freeConfig( | ||
| 159 | + Pointer<SherpaOnnxSpokenLanguageIdentificationConfig> c) { | ||
| 160 | + malloc.free(c.ref.whisper.encoder); | ||
| 161 | + malloc.free(c.ref.whisper.decoder); | ||
| 162 | + malloc.free(c.ref.provider); | ||
| 163 | + malloc.free(c); | ||
| 164 | + } | ||
| 165 | + | ||
| 166 | + /// The user has to invoke stream.free() on the returned instance | ||
| 167 | + /// to avoid memory leak | ||
| 168 | + OfflineStream createStream() { | ||
| 169 | + final p = SherpaOnnxBindings | ||
| 170 | + .sherpaOnnxSpokenLanguageIdentificationCreateOfflineStream | ||
| 171 | + ?.call(ptr) ?? | ||
| 172 | + nullptr; | ||
| 173 | + return OfflineStream(ptr: p); | ||
| 174 | + } | ||
| 175 | + | ||
| 176 | + SpokenLanguageIdentificationResult compute(OfflineStream stream) { | ||
| 177 | + final result = SherpaOnnxBindings | ||
| 178 | + .sherpaOnnxSpokenLanguageIdentificationCompute | ||
| 179 | + ?.call(ptr, stream.ptr) ?? | ||
| 180 | + nullptr; | ||
| 181 | + | ||
| 182 | + if (result == nullptr) { | ||
| 183 | + return const SpokenLanguageIdentificationResult(lang: ''); | ||
| 184 | + } | ||
| 185 | + | ||
| 186 | + final lang = toDartString(result.ref.lang); | ||
| 187 | + | ||
| 188 | + SherpaOnnxBindings.sherpaOnnxDestroySpokenLanguageIdentificationResult | ||
| 189 | + ?.call(result); | ||
| 190 | + | ||
| 191 | + return SpokenLanguageIdentificationResult(lang: lang); | ||
| 192 | + } | ||
| 193 | + | ||
| 194 | + Pointer<SherpaOnnxSpokenLanguageIdentification> ptr; | ||
| 195 | + SpokenLanguageIdentificationConfig config; | ||
| 196 | +} |
-
请 注册 或 登录 后发表评论