正在显示
21 个修改的文件
包含
733 行增加
和
17 行删除
| @@ -4,6 +4,11 @@ set -ex | @@ -4,6 +4,11 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd speaker-diarization | ||
| 8 | +echo '----------speaker diarization----------' | ||
| 9 | +./run.sh | ||
| 10 | +popd | ||
| 11 | + | ||
| 7 | pushd speaker-identification | 12 | pushd speaker-identification |
| 8 | echo '----------3d speaker----------' | 13 | echo '----------3d speaker----------' |
| 9 | ./run-3d-speaker.sh | 14 | ./run-3d-speaker.sh |
| @@ -114,6 +114,7 @@ jobs: | @@ -114,6 +114,7 @@ jobs: | ||
| 114 | cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml | 114 | cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml |
| 115 | cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml | 115 | cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml |
| 116 | cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml | 116 | cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml |
| 117 | + cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml | ||
| 117 | 118 | ||
| 118 | cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml | 119 | cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml |
| 119 | 120 |
| @@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx | @@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx | ||
| 9 | 9 | ||
| 10 | | Directory | Description | | 10 | | Directory | Description | |
| 11 | |-----------|-------------| | 11 | |-----------|-------------| |
| 12 | +| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.| | ||
| 12 | | [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.| | 13 | | [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.| |
| 13 | | [./audio-tagging](./audio-tagging)| Example for audio tagging.| | 14 | | [./audio-tagging](./audio-tagging)| Example for audio tagging.| |
| 14 | | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting| | 15 | | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting| |
| 1 | +# This file configures the static analysis results for your project (errors, | ||
| 2 | +# warnings, and lints). | ||
| 3 | +# | ||
| 4 | +# This enables the 'recommended' set of lints from `package:lints`. | ||
| 5 | +# This set helps identify many issues that may lead to problems when running | ||
| 6 | +# or consuming Dart code, and enforces writing Dart using a single, idiomatic | ||
| 7 | +# style and format. | ||
| 8 | +# | ||
| 9 | +# If you want a smaller set of lints you can change this to specify | ||
| 10 | +# 'package:lints/core.yaml'. These are just the most critical lints | ||
| 11 | +# (the recommended set includes the core lints). | ||
| 12 | +# The core lints are also what is used by pub.dev for scoring packages. | ||
| 13 | + | ||
| 14 | +include: package:lints/recommended.yaml | ||
| 15 | + | ||
| 16 | +# Uncomment the following section to specify additional rules. | ||
| 17 | + | ||
| 18 | +# linter: | ||
| 19 | +# rules: | ||
| 20 | +# - camel_case_types | ||
| 21 | + | ||
| 22 | +# analyzer: | ||
| 23 | +# exclude: | ||
| 24 | +# - path/to/excluded/files/** | ||
| 25 | + | ||
| 26 | +# For more information about the core and recommended set of lints, see | ||
| 27 | +# https://dart.dev/go/core-lints | ||
| 28 | + | ||
| 29 | +# For additional information about configuring this file, see | ||
| 30 | +# https://dart.dev/guides/language/analysis-options |
| 1 | +../../vad/bin/init.dart |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | +import 'dart:ffi'; | ||
| 5 | + | ||
| 6 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + /* Please use the following commands to download files used in this file | ||
| 13 | + Step 1: Download a speaker segmentation model | ||
| 14 | + | ||
| 15 | + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models | ||
| 16 | + for a list of available models. The following is an example | ||
| 17 | + | ||
| 18 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 19 | + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 20 | + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 21 | + | ||
| 22 | + Step 2: Download a speaker embedding extractor model | ||
| 23 | + | ||
| 24 | + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models | ||
| 25 | + for a list of available models. The following is an example | ||
| 26 | + | ||
| 27 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 28 | + | ||
| 29 | + Step 3. Download test wave files | ||
| 30 | + | ||
| 31 | + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models | ||
| 32 | + for a list of available test wave files. The following is an example | ||
| 33 | + | ||
| 34 | + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav | ||
| 35 | + | ||
| 36 | + Step 4. Run it | ||
| 37 | + */ | ||
| 38 | + | ||
| 39 | + final segmentationModel = | ||
| 40 | + "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"; | ||
| 41 | + | ||
| 42 | + final embeddingModel = | ||
| 43 | + "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; | ||
| 44 | + | ||
| 45 | + final waveFilename = "./0-four-speakers-zh.wav"; | ||
| 46 | + | ||
| 47 | + final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig( | ||
| 48 | + pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig( | ||
| 49 | + model: segmentationModel), | ||
| 50 | + ); | ||
| 51 | + | ||
| 52 | + final embeddingConfig = | ||
| 53 | + sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel); | ||
| 54 | + | ||
| 55 | + // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set | ||
| 56 | + // numClusters to 4. If you don't know the exact number, please set it to -1. | ||
| 57 | + // in that case, you have to set threshold. A larger threshold leads to | ||
| 58 | + // fewer clusters, i.e., fewer speakers. | ||
| 59 | + final clusteringConfig = | ||
| 60 | + sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5); | ||
| 61 | + | ||
| 62 | + var config = sherpa_onnx.OfflineSpeakerDiarizationConfig( | ||
| 63 | + segmentation: segmentationConfig, | ||
| 64 | + embedding: embeddingConfig, | ||
| 65 | + clustering: clusteringConfig, | ||
| 66 | + minDurationOn: 0.2, | ||
| 67 | + minDurationOff: 0.5); | ||
| 68 | + | ||
| 69 | + final sd = sherpa_onnx.OfflineSpeakerDiarization(config); | ||
| 70 | + if (sd.ptr == nullptr) { | ||
| 71 | + return; | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + final waveData = sherpa_onnx.readWave(waveFilename); | ||
| 75 | + if (sd.sampleRate != waveData.sampleRate) { | ||
| 76 | + print( | ||
| 77 | + 'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}'); | ||
| 78 | + return; | ||
| 79 | + } | ||
| 80 | + | ||
| 81 | + print('started'); | ||
| 82 | + | ||
| 83 | + // Use the following statement if you don't want to use a callback | ||
| 84 | + // final segments = sd.process(samples: waveData.samples); | ||
| 85 | + | ||
| 86 | + final segments = sd.processWithCallback( | ||
| 87 | + samples: waveData.samples, | ||
| 88 | + callback: (int numProcessedChunk, int numTotalChunks) { | ||
| 89 | + final progress = 100.0 * numProcessedChunk / numTotalChunks; | ||
| 90 | + | ||
| 91 | + print('Progress ${progress.toStringAsFixed(2)}%'); | ||
| 92 | + | ||
| 93 | + return 0; | ||
| 94 | + }); | ||
| 95 | + | ||
| 96 | + for (int i = 0; i < segments.length; ++i) { | ||
| 97 | + print( | ||
| 98 | + '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}'); | ||
| 99 | + } | ||
| 100 | +} |
| 1 | +name: speaker_diarization | ||
| 2 | +description: > | ||
| 3 | + This example demonstrates how to use the Dart API for speaker diarization. | ||
| 4 | + | ||
| 5 | +version: 1.0.0 | ||
| 6 | + | ||
| 7 | +environment: | ||
| 8 | + sdk: ">=3.0.0 <4.0.0" | ||
| 9 | + | ||
| 10 | +dependencies: | ||
| 11 | + sherpa_onnx: ^1.10.27 | ||
| 12 | + # sherpa_onnx: | ||
| 13 | + # path: ../../flutter/sherpa_onnx | ||
| 14 | + path: ^1.9.0 | ||
| 15 | + | ||
| 16 | +dev_dependencies: | ||
| 17 | + lints: ^3.0.0 |
dart-api-examples/speaker-diarization/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 10 | + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
| 14 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx | ||
| 15 | +fi | ||
| 16 | + | ||
| 17 | +if [ ! -f ./0-four-speakers-zh.wav ]; then | ||
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +dart run ./bin/speaker-diarization.dart |
| @@ -11,6 +11,7 @@ | @@ -11,6 +11,7 @@ | ||
| 11 | 11 | ||
| 12 | | Functions | URL | Supported Platforms| | 12 | | Functions | URL | Supported Platforms| |
| 13 | |---|---|---| | 13 | |---|---|---| |
| 14 | +|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux| | ||
| 14 | |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux| | 15 | |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux| |
| 15 | |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux| | 16 | |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux| |
| 16 | |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux| | 17 | |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux| |
| @@ -6,6 +6,7 @@ export 'src/audio_tagging.dart'; | @@ -6,6 +6,7 @@ export 'src/audio_tagging.dart'; | ||
| 6 | export 'src/feature_config.dart'; | 6 | export 'src/feature_config.dart'; |
| 7 | export 'src/keyword_spotter.dart'; | 7 | export 'src/keyword_spotter.dart'; |
| 8 | export 'src/offline_recognizer.dart'; | 8 | export 'src/offline_recognizer.dart'; |
| 9 | +export 'src/offline_speaker_diarization.dart'; | ||
| 9 | export 'src/offline_stream.dart'; | 10 | export 'src/offline_stream.dart'; |
| 10 | export 'src/online_recognizer.dart'; | 11 | export 'src/online_recognizer.dart'; |
| 11 | export 'src/online_stream.dart'; | 12 | export 'src/online_stream.dart'; |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:ffi'; | ||
| 3 | +import 'dart:typed_data'; | ||
| 4 | + | ||
| 5 | +import 'package:ffi/ffi.dart'; | ||
| 6 | + | ||
| 7 | +import './sherpa_onnx_bindings.dart'; | ||
| 8 | +import './speaker_identification.dart'; | ||
| 9 | + | ||
| 10 | +class OfflineSpeakerDiarizationSegment { | ||
| 11 | + const OfflineSpeakerDiarizationSegment({ | ||
| 12 | + required this.start, | ||
| 13 | + required this.end, | ||
| 14 | + required this.speaker, | ||
| 15 | + }); | ||
| 16 | + | ||
| 17 | + @override | ||
| 18 | + String toString() { | ||
| 19 | + return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)'; | ||
| 20 | + } | ||
| 21 | + | ||
| 22 | + final double start; | ||
| 23 | + final double end; | ||
| 24 | + final int speaker; | ||
| 25 | +} | ||
| 26 | + | ||
| 27 | +class OfflineSpeakerSegmentationPyannoteModelConfig { | ||
| 28 | + const OfflineSpeakerSegmentationPyannoteModelConfig({ | ||
| 29 | + this.model = '', | ||
| 30 | + }); | ||
| 31 | + | ||
| 32 | + @override | ||
| 33 | + String toString() { | ||
| 34 | + return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)'; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + final String model; | ||
| 38 | +} | ||
| 39 | + | ||
| 40 | +class OfflineSpeakerSegmentationModelConfig { | ||
| 41 | + const OfflineSpeakerSegmentationModelConfig({ | ||
| 42 | + this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(), | ||
| 43 | + this.numThreads = 1, | ||
| 44 | + this.debug = true, | ||
| 45 | + this.provider = 'cpu', | ||
| 46 | + }); | ||
| 47 | + | ||
| 48 | + @override | ||
| 49 | + String toString() { | ||
| 50 | + return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)'; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + final OfflineSpeakerSegmentationPyannoteModelConfig pyannote; | ||
| 54 | + | ||
| 55 | + final int numThreads; | ||
| 56 | + final bool debug; | ||
| 57 | + final String provider; | ||
| 58 | +} | ||
| 59 | + | ||
| 60 | +class FastClusteringConfig { | ||
| 61 | + const FastClusteringConfig({ | ||
| 62 | + this.numClusters = -1, | ||
| 63 | + this.threshold = 0.5, | ||
| 64 | + }); | ||
| 65 | + | ||
| 66 | + @override | ||
| 67 | + String toString() { | ||
| 68 | + return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)'; | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + final int numClusters; | ||
| 72 | + final double threshold; | ||
| 73 | +} | ||
| 74 | + | ||
| 75 | +class OfflineSpeakerDiarizationConfig { | ||
| 76 | + const OfflineSpeakerDiarizationConfig({ | ||
| 77 | + this.segmentation = const OfflineSpeakerSegmentationModelConfig(), | ||
| 78 | + this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''), | ||
| 79 | + this.clustering = const FastClusteringConfig(), | ||
| 80 | + this.minDurationOn = 0.2, | ||
| 81 | + this.minDurationOff = 0.5, | ||
| 82 | + }); | ||
| 83 | + | ||
| 84 | + @override | ||
| 85 | + String toString() { | ||
| 86 | + return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)'; | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + final OfflineSpeakerSegmentationModelConfig segmentation; | ||
| 90 | + final SpeakerEmbeddingExtractorConfig embedding; | ||
| 91 | + final FastClusteringConfig clustering; | ||
| 92 | + final double minDurationOff; // in seconds | ||
| 93 | + final double minDurationOn; // in seconds | ||
| 94 | +} | ||
| 95 | + | ||
| 96 | +class OfflineSpeakerDiarization { | ||
| 97 | + OfflineSpeakerDiarization._( | ||
| 98 | + {required this.ptr, required this.config, required this.sampleRate}); | ||
| 99 | + | ||
| 100 | + void free() { | ||
| 101 | + SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr); | ||
| 102 | + ptr = nullptr; | ||
| 103 | + } | ||
| 104 | + | ||
| 105 | + /// The user is responsible to call the OfflineSpeakerDiarization.free() | ||
| 106 | + /// method of the returned instance to avoid memory leak. | ||
| 107 | + factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) { | ||
| 108 | + final c = calloc<SherpaOnnxOfflineSpeakerDiarizationConfig>(); | ||
| 109 | + | ||
| 110 | + c.ref.segmentation.pyannote.model = | ||
| 111 | + config.segmentation.pyannote.model.toNativeUtf8(); | ||
| 112 | + c.ref.segmentation.numThreads = config.segmentation.numThreads; | ||
| 113 | + c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0; | ||
| 114 | + c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8(); | ||
| 115 | + | ||
| 116 | + c.ref.embedding.model = config.embedding.model.toNativeUtf8(); | ||
| 117 | + c.ref.embedding.numThreads = config.embedding.numThreads; | ||
| 118 | + c.ref.embedding.debug = config.embedding.debug ? 1 : 0; | ||
| 119 | + c.ref.embedding.provider = config.embedding.provider.toNativeUtf8(); | ||
| 120 | + | ||
| 121 | + c.ref.clustering.numClusters = config.clustering.numClusters; | ||
| 122 | + c.ref.clustering.threshold = config.clustering.threshold; | ||
| 123 | + | ||
| 124 | + c.ref.minDurationOn = config.minDurationOn; | ||
| 125 | + c.ref.minDurationOff = config.minDurationOff; | ||
| 126 | + | ||
| 127 | + final ptr = | ||
| 128 | + SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ?? | ||
| 129 | + nullptr; | ||
| 130 | + | ||
| 131 | + calloc.free(c.ref.embedding.provider); | ||
| 132 | + calloc.free(c.ref.embedding.model); | ||
| 133 | + calloc.free(c.ref.segmentation.provider); | ||
| 134 | + calloc.free(c.ref.segmentation.pyannote.model); | ||
| 135 | + | ||
| 136 | + int sampleRate = 0; | ||
| 137 | + if (ptr != nullptr) { | ||
| 138 | + sampleRate = SherpaOnnxBindings | ||
| 139 | + .sherpaOnnxOfflineSpeakerDiarizationGetSampleRate | ||
| 140 | + ?.call(ptr) ?? | ||
| 141 | + 0; | ||
| 142 | + } | ||
| 143 | + return OfflineSpeakerDiarization._( | ||
| 144 | + ptr: ptr, config: config, sampleRate: sampleRate); | ||
| 145 | + } | ||
| 146 | + | ||
| 147 | + List<OfflineSpeakerDiarizationSegment> process( | ||
| 148 | + {required Float32List samples}) { | ||
| 149 | + if (ptr == nullptr) { | ||
| 150 | + return <OfflineSpeakerDiarizationSegment>[]; | ||
| 151 | + } | ||
| 152 | + | ||
| 153 | + final n = samples.length; | ||
| 154 | + final Pointer<Float> p = calloc<Float>(n); | ||
| 155 | + | ||
| 156 | + final pList = p.asTypedList(n); | ||
| 157 | + pList.setAll(0, samples); | ||
| 158 | + | ||
| 159 | + final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess | ||
| 160 | + ?.call(ptr, p, n) ?? | ||
| 161 | + nullptr; | ||
| 162 | + | ||
| 163 | + final ans = _processImpl(r); | ||
| 164 | + | ||
| 165 | + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult | ||
| 166 | + ?.call(r); | ||
| 167 | + | ||
| 168 | + return ans; | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + List<OfflineSpeakerDiarizationSegment> processWithCallback({ | ||
| 172 | + required Float32List samples, | ||
| 173 | + required int Function(int numProcessedChunks, int numTotalChunks) callback, | ||
| 174 | + }) { | ||
| 175 | + if (ptr == nullptr) { | ||
| 176 | + return <OfflineSpeakerDiarizationSegment>[]; | ||
| 177 | + } | ||
| 178 | + | ||
| 179 | + final n = samples.length; | ||
| 180 | + final Pointer<Float> p = calloc<Float>(n); | ||
| 181 | + | ||
| 182 | + final pList = p.asTypedList(n); | ||
| 183 | + pList.setAll(0, samples); | ||
| 184 | + | ||
| 185 | + final wrapper = NativeCallable< | ||
| 186 | + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal( | ||
| 187 | + (int numProcessedChunks, int numTotalChunks) { | ||
| 188 | + return callback(numProcessedChunks, numTotalChunks); | ||
| 189 | + }, exceptionalReturn: 0); | ||
| 190 | + | ||
| 191 | + final r = SherpaOnnxBindings | ||
| 192 | + .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg | ||
| 193 | + ?.call(ptr, p, n, wrapper.nativeFunction) ?? | ||
| 194 | + nullptr; | ||
| 195 | + | ||
| 196 | + wrapper.close(); | ||
| 197 | + | ||
| 198 | + final ans = _processImpl(r); | ||
| 199 | + | ||
| 200 | + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult | ||
| 201 | + ?.call(r); | ||
| 202 | + | ||
| 203 | + return ans; | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + List<OfflineSpeakerDiarizationSegment> _processImpl( | ||
| 207 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> r) { | ||
| 208 | + if (r == nullptr) { | ||
| 209 | + return <OfflineSpeakerDiarizationSegment>[]; | ||
| 210 | + } | ||
| 211 | + | ||
| 212 | + final numSegments = SherpaOnnxBindings | ||
| 213 | + .sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments | ||
| 214 | + ?.call(r) ?? | ||
| 215 | + 0; | ||
| 216 | + final segments = SherpaOnnxBindings | ||
| 217 | + .sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime | ||
| 218 | + ?.call(r) ?? | ||
| 219 | + nullptr; | ||
| 220 | + | ||
| 221 | + if (segments == nullptr) { | ||
| 222 | + return <OfflineSpeakerDiarizationSegment>[]; | ||
| 223 | + } | ||
| 224 | + | ||
| 225 | + final ans = <OfflineSpeakerDiarizationSegment>[]; | ||
| 226 | + for (int i = 0; i != numSegments; ++i) { | ||
| 227 | + final s = segments + i; | ||
| 228 | + | ||
| 229 | + final tmp = OfflineSpeakerDiarizationSegment( | ||
| 230 | + start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker); | ||
| 231 | + ans.add(tmp); | ||
| 232 | + } | ||
| 233 | + | ||
| 234 | + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment | ||
| 235 | + ?.call(segments); | ||
| 236 | + | ||
| 237 | + return ans; | ||
| 238 | + } | ||
| 239 | + | ||
| 240 | + Pointer<SherpaOnnxOfflineSpeakerDiarization> ptr; | ||
| 241 | + OfflineSpeakerDiarizationConfig config; | ||
| 242 | + final int sampleRate; | ||
| 243 | +} |
| @@ -2,6 +2,66 @@ | @@ -2,6 +2,66 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'package:ffi/ffi.dart'; | 3 | import 'package:ffi/ffi.dart'; |
| 4 | 4 | ||
| 5 | +final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | ||
| 6 | + external Pointer<Utf8> model; | ||
| 7 | + | ||
| 8 | + @Int32() | ||
| 9 | + external int numThreads; | ||
| 10 | + | ||
| 11 | + @Int32() | ||
| 12 | + external int debug; | ||
| 13 | + | ||
| 14 | + external Pointer<Utf8> provider; | ||
| 15 | +} | ||
| 16 | + | ||
| 17 | +final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct { | ||
| 18 | + @Float() | ||
| 19 | + external double start; | ||
| 20 | + | ||
| 21 | + @Float() | ||
| 22 | + external double end; | ||
| 23 | + | ||
| 24 | + @Int32() | ||
| 25 | + external int speaker; | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig | ||
| 29 | + extends Struct { | ||
| 30 | + external Pointer<Utf8> model; | ||
| 31 | +} | ||
| 32 | + | ||
| 33 | +final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct { | ||
| 34 | + external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote; | ||
| 35 | + | ||
| 36 | + @Int32() | ||
| 37 | + external int numThreads; | ||
| 38 | + | ||
| 39 | + @Int32() | ||
| 40 | + external int debug; | ||
| 41 | + | ||
| 42 | + external Pointer<Utf8> provider; | ||
| 43 | +} | ||
| 44 | + | ||
| 45 | +final class SherpaOnnxFastClusteringConfig extends Struct { | ||
| 46 | + @Int32() | ||
| 47 | + external int numClusters; | ||
| 48 | + | ||
| 49 | + @Float() | ||
| 50 | + external double threshold; | ||
| 51 | +} | ||
| 52 | + | ||
| 53 | +final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct { | ||
| 54 | + external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation; | ||
| 55 | + external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding; | ||
| 56 | + external SherpaOnnxFastClusteringConfig clustering; | ||
| 57 | + | ||
| 58 | + @Float() | ||
| 59 | + external double minDurationOn; | ||
| 60 | + | ||
| 61 | + @Float() | ||
| 62 | + external double minDurationOff; | ||
| 63 | +} | ||
| 64 | + | ||
| 5 | final class SherpaOnnxOfflinePunctuationModelConfig extends Struct { | 65 | final class SherpaOnnxOfflinePunctuationModelConfig extends Struct { |
| 6 | external Pointer<Utf8> ctTransformer; | 66 | external Pointer<Utf8> ctTransformer; |
| 7 | 67 | ||
| @@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct { | @@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct { | ||
| 341 | external int numSamples; | 401 | external int numSamples; |
| 342 | } | 402 | } |
| 343 | 403 | ||
| 344 | -final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { | ||
| 345 | - external Pointer<Utf8> model; | ||
| 346 | - | ||
| 347 | - @Int32() | ||
| 348 | - external int numThreads; | ||
| 349 | - | ||
| 350 | - @Int32() | ||
| 351 | - external int debug; | ||
| 352 | - | ||
| 353 | - external Pointer<Utf8> provider; | ||
| 354 | -} | ||
| 355 | - | ||
| 356 | final class SherpaOnnxKeywordSpotterConfig extends Struct { | 404 | final class SherpaOnnxKeywordSpotterConfig extends Struct { |
| 357 | external SherpaOnnxFeatureConfig feat; | 405 | external SherpaOnnxFeatureConfig feat; |
| 358 | 406 | ||
| @@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | @@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | ||
| 402 | 450 | ||
| 403 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} | 451 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} |
| 404 | 452 | ||
| 453 | +final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {} | ||
| 454 | + | ||
| 455 | +final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {} | ||
| 456 | + | ||
| 457 | +typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative | ||
| 458 | + = Pointer<SherpaOnnxOfflineSpeakerDiarization> Function( | ||
| 459 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>); | ||
| 460 | + | ||
| 461 | +typedef SherpaOnnxCreateOfflineSpeakerDiarization | ||
| 462 | + = SherpaOnnxCreateOfflineSpeakerDiarizationNative; | ||
| 463 | + | ||
| 464 | +typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function( | ||
| 465 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>); | ||
| 466 | + | ||
| 467 | +typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function( | ||
| 468 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>); | ||
| 469 | + | ||
| 405 | typedef SherpaOnnxCreateOfflinePunctuationNative | 470 | typedef SherpaOnnxCreateOfflinePunctuationNative |
| 406 | = Pointer<SherpaOnnxOfflinePunctuation> Function( | 471 | = Pointer<SherpaOnnxOfflinePunctuation> Function( |
| 407 | Pointer<SherpaOnnxOfflinePunctuationConfig>); | 472 | Pointer<SherpaOnnxOfflinePunctuationConfig>); |
| 408 | 473 | ||
| 474 | +typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function( | ||
| 475 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>); | ||
| 476 | + | ||
| 477 | +typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function( | ||
| 478 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>); | ||
| 479 | + | ||
| 480 | +typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function( | ||
| 481 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, | ||
| 482 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>); | ||
| 483 | + | ||
| 484 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32 | ||
| 485 | + Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 486 | + | ||
| 487 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function( | ||
| 488 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 489 | + | ||
| 490 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32 | ||
| 491 | + Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 492 | + | ||
| 493 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function( | ||
| 494 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 495 | + | ||
| 496 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative | ||
| 497 | + = Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment> Function( | ||
| 498 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 499 | + | ||
| 500 | +typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime | ||
| 501 | + = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative; | ||
| 502 | + | ||
| 503 | +typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function( | ||
| 504 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>); | ||
| 505 | + | ||
| 506 | +typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function( | ||
| 507 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>); | ||
| 508 | + | ||
| 509 | +typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative | ||
| 510 | + = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function( | ||
| 511 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, Int32); | ||
| 512 | + | ||
| 513 | +typedef SherpaOnnxOfflineSpeakerDiarizationProcess | ||
| 514 | + = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function( | ||
| 515 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, int); | ||
| 516 | + | ||
| 517 | +typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32 | ||
| 518 | + Function(Int32, Int32); | ||
| 519 | + | ||
| 520 | +typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative | ||
| 521 | + = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function( | ||
| 522 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, | ||
| 523 | + Pointer<Float>, | ||
| 524 | + Int32, | ||
| 525 | + Pointer< | ||
| 526 | + NativeFunction< | ||
| 527 | + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>); | ||
| 528 | + | ||
| 529 | +typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg | ||
| 530 | + = Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function( | ||
| 531 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, | ||
| 532 | + Pointer<Float>, | ||
| 533 | + int, | ||
| 534 | + Pointer< | ||
| 535 | + NativeFunction< | ||
| 536 | + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>); | ||
| 537 | + | ||
| 538 | +typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function( | ||
| 539 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 540 | + | ||
| 541 | +typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function( | ||
| 542 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>); | ||
| 543 | + | ||
| 544 | +typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function( | ||
| 545 | + Pointer<SherpaOnnxOfflineSpeakerDiarization>, | ||
| 546 | + Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>); | ||
| 547 | + | ||
| 409 | typedef SherpaOnnxCreateOfflinePunctuation | 548 | typedef SherpaOnnxCreateOfflinePunctuation |
| 410 | = SherpaOnnxCreateOfflinePunctuationNative; | 549 | = SherpaOnnxCreateOfflinePunctuationNative; |
| 411 | 550 | ||
| @@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | @@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | ||
| 940 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); | 1079 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); |
| 941 | 1080 | ||
| 942 | class SherpaOnnxBindings { | 1081 | class SherpaOnnxBindings { |
| 1082 | + static SherpaOnnxCreateOfflineSpeakerDiarization? | ||
| 1083 | + sherpaOnnxCreateOfflineSpeakerDiarization; | ||
| 1084 | + static SherpaOnnxDestroyOfflineSpeakerDiarization? | ||
| 1085 | + sherpaOnnxDestroyOfflineSpeakerDiarization; | ||
| 1086 | + static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate? | ||
| 1087 | + sherpaOnnxOfflineSpeakerDiarizationGetSampleRate; | ||
| 1088 | + static SherpaOnnxOfflineSpeakerDiarizationSetConfig? | ||
| 1089 | + sherpaOnnxOfflineSpeakerDiarizationSetConfig; | ||
| 1090 | + static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers? | ||
| 1091 | + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers; | ||
| 1092 | + static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments? | ||
| 1093 | + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments; | ||
| 1094 | + static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime? | ||
| 1095 | + sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime; | ||
| 1096 | + static SherpaOnnxOfflineSpeakerDiarizationDestroySegment? | ||
| 1097 | + sherpaOnnxOfflineSpeakerDiarizationDestroySegment; | ||
| 1098 | + static SherpaOnnxOfflineSpeakerDiarizationProcess? | ||
| 1099 | + sherpaOnnxOfflineSpeakerDiarizationProcess; | ||
| 1100 | + static SherpaOnnxOfflineSpeakerDiarizationDestroyResult? | ||
| 1101 | + sherpaOnnxOfflineSpeakerDiarizationDestroyResult; | ||
| 1102 | + static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg? | ||
| 1103 | + sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg; | ||
| 1104 | + | ||
| 943 | static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation; | 1105 | static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation; |
| 944 | static SherpaOnnxDestroyOfflinePunctuation? | 1106 | static SherpaOnnxDestroyOfflinePunctuation? |
| 945 | sherpaOnnxDestroyOfflinePunctuation; | 1107 | sherpaOnnxDestroyOfflinePunctuation; |
| @@ -1107,6 +1269,83 @@ class SherpaOnnxBindings { | @@ -1107,6 +1269,83 @@ class SherpaOnnxBindings { | ||
| 1107 | static SherpaOnnxFreeWave? freeWave; | 1269 | static SherpaOnnxFreeWave? freeWave; |
| 1108 | 1270 | ||
| 1109 | static void init(DynamicLibrary dynamicLibrary) { | 1271 | static void init(DynamicLibrary dynamicLibrary) { |
| 1272 | + sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary | ||
| 1273 | + .lookup< | ||
| 1274 | + NativeFunction< | ||
| 1275 | + SherpaOnnxCreateOfflineSpeakerDiarizationNative>>( | ||
| 1276 | + 'SherpaOnnxCreateOfflineSpeakerDiarization') | ||
| 1277 | + .asFunction(); | ||
| 1278 | + | ||
| 1279 | + sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary | ||
| 1280 | + .lookup< | ||
| 1281 | + NativeFunction< | ||
| 1282 | + SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>( | ||
| 1283 | + 'SherpaOnnxDestroyOfflineSpeakerDiarization') | ||
| 1284 | + .asFunction(); | ||
| 1285 | + | ||
| 1286 | + sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary | ||
| 1287 | + .lookup< | ||
| 1288 | + NativeFunction< | ||
| 1289 | + SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>( | ||
| 1290 | + 'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate') | ||
| 1291 | + .asFunction(); | ||
| 1292 | + | ||
| 1293 | + sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary | ||
| 1294 | + .lookup< | ||
| 1295 | + NativeFunction< | ||
| 1296 | + SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>( | ||
| 1297 | + 'SherpaOnnxOfflineSpeakerDiarizationSetConfig') | ||
| 1298 | + .asFunction(); | ||
| 1299 | + | ||
| 1300 | + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary | ||
| 1301 | + .lookup< | ||
| 1302 | + NativeFunction< | ||
| 1303 | + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>( | ||
| 1304 | + 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers') | ||
| 1305 | + .asFunction(); | ||
| 1306 | + | ||
| 1307 | + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary | ||
| 1308 | + .lookup< | ||
| 1309 | + NativeFunction< | ||
| 1310 | + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>( | ||
| 1311 | + 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments') | ||
| 1312 | + .asFunction(); | ||
| 1313 | + | ||
| 1314 | + sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary | ||
| 1315 | + .lookup< | ||
| 1316 | + NativeFunction< | ||
| 1317 | + SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>( | ||
| 1318 | + 'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime') | ||
| 1319 | + .asFunction(); | ||
| 1320 | + | ||
| 1321 | + sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary | ||
| 1322 | + .lookup< | ||
| 1323 | + NativeFunction< | ||
| 1324 | + SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>( | ||
| 1325 | + 'SherpaOnnxOfflineSpeakerDiarizationDestroySegment') | ||
| 1326 | + .asFunction(); | ||
| 1327 | + | ||
| 1328 | + sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary | ||
| 1329 | + .lookup< | ||
| 1330 | + NativeFunction< | ||
| 1331 | + SherpaOnnxOfflineSpeakerDiarizationProcessNative>>( | ||
| 1332 | + 'SherpaOnnxOfflineSpeakerDiarizationProcess') | ||
| 1333 | + .asFunction(); | ||
| 1334 | + | ||
| 1335 | + sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary | ||
| 1336 | + .lookup< | ||
| 1337 | + NativeFunction< | ||
| 1338 | + SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>( | ||
| 1339 | + 'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg') | ||
| 1340 | + .asFunction(); | ||
| 1341 | + | ||
| 1342 | + sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary | ||
| 1343 | + .lookup< | ||
| 1344 | + NativeFunction< | ||
| 1345 | + SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>( | ||
| 1346 | + 'SherpaOnnxOfflineSpeakerDiarizationDestroyResult') | ||
| 1347 | + .asFunction(); | ||
| 1348 | + | ||
| 1110 | sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary | 1349 | sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary |
| 1111 | .lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>( | 1350 | .lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>( |
| 1112 | 'SherpaOnnxCreateOfflinePunctuation') | 1351 | 'SherpaOnnxCreateOfflinePunctuation') |
| 1 | name: sherpa_onnx | 1 | name: sherpa_onnx |
| 2 | 2 | ||
| 3 | description: > | 3 | description: > |
| 4 | - Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi | ||
| 5 | - with onnxruntime without Internet connection. | 4 | + Speech recognition, speech synthesis, speaker diarization, and speaker recognition |
| 5 | + using next-gen Kaldi with onnxruntime without Internet connection. | ||
| 6 | 6 | ||
| 7 | repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter | 7 | repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter |
| 8 | 8 | ||
| @@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/ | @@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/ | ||
| 12 | topics: | 12 | topics: |
| 13 | - speech-recognition | 13 | - speech-recognition |
| 14 | - speech-synthesis | 14 | - speech-synthesis |
| 15 | - - speaker-identification | 15 | + - speaker-diarization |
| 16 | - audio-tagging | 16 | - audio-tagging |
| 17 | - voice-activity-detection | 17 | - voice-activity-detection |
| 18 | 18 | ||
| @@ -41,7 +41,7 @@ dependencies: | @@ -41,7 +41,7 @@ dependencies: | ||
| 41 | sherpa_onnx_linux: ^1.10.27 | 41 | sherpa_onnx_linux: ^1.10.27 |
| 42 | # sherpa_onnx_linux: | 42 | # sherpa_onnx_linux: |
| 43 | # path: ../sherpa_onnx_linux | 43 | # path: ../sherpa_onnx_linux |
| 44 | - # | 44 | + |
| 45 | sherpa_onnx_windows: ^1.10.27 | 45 | sherpa_onnx_windows: ^1.10.27 |
| 46 | # sherpa_onnx_windows: | 46 | # sherpa_onnx_windows: |
| 47 | # path: ../sherpa_onnx_windows | 47 | # path: ../sherpa_onnx_windows |
| 1 | +name: speaker_diarization | ||
| 2 | +description: > | ||
| 3 | + This example demonstrates how to use the Dart API for speaker diarization. | ||
| 4 | + | ||
| 5 | +version: 1.0.0 | ||
| 6 | + | ||
| 7 | +environment: | ||
| 8 | + sdk: ">=3.0.0 <4.0.0" | ||
| 9 | + | ||
| 10 | +dependencies: | ||
| 11 | + sherpa_onnx: | ||
| 12 | + path: ../../flutter/sherpa_onnx | ||
| 13 | + path: ^1.9.0 | ||
| 14 | + | ||
| 15 | +dev_dependencies: | ||
| 16 | + lints: ^3.0.0 |
| @@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( | @@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( | ||
| 1828 | return ans; | 1828 | return ans; |
| 1829 | } | 1829 | } |
| 1830 | 1830 | ||
| 1831 | +const SherpaOnnxOfflineSpeakerDiarizationResult * | ||
| 1832 | +SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( | ||
| 1833 | + const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, | ||
| 1834 | + int32_t n, | ||
| 1835 | + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) { | ||
| 1836 | + auto wrapper = [callback](int32_t num_processed_chunks, | ||
| 1837 | + int32_t num_total_chunks, void *) { | ||
| 1838 | + return callback(num_processed_chunks, num_total_chunks); | ||
| 1839 | + }; | ||
| 1840 | + | ||
| 1841 | + auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult; | ||
| 1842 | + ans->impl = sd->impl->Process(samples, n, wrapper); | ||
| 1843 | + | ||
| 1844 | + return ans; | ||
| 1845 | +} | ||
| 1846 | + | ||
| 1831 | #endif | 1847 | #endif |
| @@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment( | @@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment( | ||
| 1485 | typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)( | 1485 | typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)( |
| 1486 | int32_t num_processed_chunk, int32_t num_total_chunks, void *arg); | 1486 | int32_t num_processed_chunk, int32_t num_total_chunks, void *arg); |
| 1487 | 1487 | ||
| 1488 | +typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)( | ||
| 1489 | + int32_t num_processed_chunk, int32_t num_total_chunks); | ||
| 1490 | + | ||
| 1488 | // The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult() | 1491 | // The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult() |
| 1489 | // to free the returned pointer to avoid memory leak. | 1492 | // to free the returned pointer to avoid memory leak. |
| 1490 | SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult * | 1493 | SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult * |
| @@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( | @@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( | ||
| 1500 | int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback, | 1503 | int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback, |
| 1501 | void *arg); | 1504 | void *arg); |
| 1502 | 1505 | ||
| 1506 | +SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult * | ||
| 1507 | +SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( | ||
| 1508 | + const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, | ||
| 1509 | + int32_t n, | ||
| 1510 | + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback); | ||
| 1511 | + | ||
| 1503 | SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( | 1512 | SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( |
| 1504 | const SherpaOnnxOfflineSpeakerDiarizationResult *r); | 1513 | const SherpaOnnxOfflineSpeakerDiarizationResult *r); |
| 1505 | 1514 |
| @@ -5,6 +5,7 @@ | @@ -5,6 +5,7 @@ | ||
| 5 | #define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_ | 5 | #define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_ |
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | +#include <memory> | ||
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| @@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback( | @@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback( | ||
| 204 | jfloat *p = env->GetFloatArrayElements(samples, nullptr); | 204 | jfloat *p = env->GetFloatArrayElements(samples, nullptr); |
| 205 | jsize n = env->GetArrayLength(samples); | 205 | jsize n = env->GetArrayLength(samples); |
| 206 | auto segments = | 206 | auto segments = |
| 207 | - sd->Process(p, n, callback_wrapper, (void *)arg).SortByStartTime(); | 207 | + sd->Process(p, n, callback_wrapper, reinterpret_cast<void *>(arg)) |
| 208 | + .SortByStartTime(); | ||
| 208 | env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); | 209 | env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); |
| 209 | 210 | ||
| 210 | return ProcessImpl(env, segments); | 211 | return ProcessImpl(env, segments); |
-
请 注册 或 登录 后发表评论