Fangjun Kuang
Committed by GitHub

Dart API for speaker diarization (#1418)

... ... @@ -4,6 +4,11 @@ set -ex
cd dart-api-examples
pushd speaker-diarization
echo '----------speaker diarization----------'
./run.sh
popd
pushd speaker-identification
echo '----------3d speaker----------'
./run-3d-speaker.sh
... ...
... ... @@ -114,6 +114,7 @@ jobs:
cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
... ...
... ... @@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
| Directory | Description |
|-----------|-------------|
| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.|
| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
... ...
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
... ...
# Introduction
This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
# Usage
Please see [./run.sh](./run.sh)
... ...
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.
include: package:lints/recommended.yaml
# Uncomment the following section to specify additional rules.
# linter:
# rules:
# - camel_case_types
# analyzer:
# exclude:
# - path/to/excluded/files/**
# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints
# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options
... ...
../../vad/bin/init.dart
\ No newline at end of file
... ...
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:io';
import 'dart:typed_data';
import 'dart:ffi';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
/* Please use the following commands to download files used in this file
Step 1: Download a speaker segmentation model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
Step 2: Download a speaker embedding extractor model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
Step 3. Download test wave files
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
Step 4. Run it
*/
final segmentationModel =
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx";
final embeddingModel =
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx";
final waveFilename = "./0-four-speakers-zh.wav";
final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig(
pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig(
model: segmentationModel),
);
final embeddingConfig =
sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel);
// since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
// numClusters to 4. If you don't know the exact number, please set it to -1.
// in that case, you have to set threshold. A larger threshold leads to
// fewer clusters, i.e., fewer speakers.
final clusteringConfig =
sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5);
var config = sherpa_onnx.OfflineSpeakerDiarizationConfig(
segmentation: segmentationConfig,
embedding: embeddingConfig,
clustering: clusteringConfig,
minDurationOn: 0.2,
minDurationOff: 0.5);
final sd = sherpa_onnx.OfflineSpeakerDiarization(config);
if (sd.ptr == nullptr) {
return;
}
final waveData = sherpa_onnx.readWave(waveFilename);
if (sd.sampleRate != waveData.sampleRate) {
print(
'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}');
return;
}
print('started');
// Use the following statement if you don't want to use a callback
// final segments = sd.process(samples: waveData.samples);
final segments = sd.processWithCallback(
samples: waveData.samples,
callback: (int numProcessedChunk, int numTotalChunks) {
final progress = 100.0 * numProcessedChunk / numTotalChunks;
print('Progress ${progress.toStringAsFixed(2)}%');
return 0;
});
for (int i = 0; i < segments.length; ++i) {
print(
'${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}');
}
}
... ...
name: speaker_diarization
description: >
This example demonstrates how to use the Dart API for speaker diarization.
version: 1.0.0
environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx: ^1.10.27
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
dev_dependencies:
lints: ^3.0.0
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi
if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
if [ ! -f ./0-four-speakers-zh.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi
dart run ./bin/speaker-diarization.dart
... ...
... ... @@ -11,6 +11,7 @@
| Functions | URL | Supported Platforms|
|---|---|---|
|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux|
|Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux|
|Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
|Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
... ...
... ... @@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
export 'src/feature_config.dart';
export 'src/keyword_spotter.dart';
export 'src/offline_recognizer.dart';
export 'src/offline_speaker_diarization.dart';
export 'src/offline_stream.dart';
export 'src/online_recognizer.dart';
export 'src/online_stream.dart';
... ...
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';
import './sherpa_onnx_bindings.dart';
import './speaker_identification.dart';
class OfflineSpeakerDiarizationSegment {
const OfflineSpeakerDiarizationSegment({
required this.start,
required this.end,
required this.speaker,
});
@override
String toString() {
return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)';
}
final double start;
final double end;
final int speaker;
}
class OfflineSpeakerSegmentationPyannoteModelConfig {
const OfflineSpeakerSegmentationPyannoteModelConfig({
this.model = '',
});
@override
String toString() {
return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)';
}
final String model;
}
class OfflineSpeakerSegmentationModelConfig {
const OfflineSpeakerSegmentationModelConfig({
this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(),
this.numThreads = 1,
this.debug = true,
this.provider = 'cpu',
});
@override
String toString() {
return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)';
}
final OfflineSpeakerSegmentationPyannoteModelConfig pyannote;
final int numThreads;
final bool debug;
final String provider;
}
class FastClusteringConfig {
const FastClusteringConfig({
this.numClusters = -1,
this.threshold = 0.5,
});
@override
String toString() {
return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)';
}
final int numClusters;
final double threshold;
}
class OfflineSpeakerDiarizationConfig {
const OfflineSpeakerDiarizationConfig({
this.segmentation = const OfflineSpeakerSegmentationModelConfig(),
this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''),
this.clustering = const FastClusteringConfig(),
this.minDurationOn = 0.2,
this.minDurationOff = 0.5,
});
@override
String toString() {
return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)';
}
final OfflineSpeakerSegmentationModelConfig segmentation;
final SpeakerEmbeddingExtractorConfig embedding;
final FastClusteringConfig clustering;
final double minDurationOff; // in seconds
final double minDurationOn; // in seconds
}
class OfflineSpeakerDiarization {
OfflineSpeakerDiarization._(
{required this.ptr, required this.config, required this.sampleRate});
void free() {
SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr);
ptr = nullptr;
}
/// The user is responsible to call the OfflineSpeakerDiarization.free()
/// method of the returned instance to avoid memory leak.
factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) {
final c = calloc<SherpaOnnxOfflineSpeakerDiarizationConfig>();
c.ref.segmentation.pyannote.model =
config.segmentation.pyannote.model.toNativeUtf8();
c.ref.segmentation.numThreads = config.segmentation.numThreads;
c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0;
c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8();
c.ref.embedding.model = config.embedding.model.toNativeUtf8();
c.ref.embedding.numThreads = config.embedding.numThreads;
c.ref.embedding.debug = config.embedding.debug ? 1 : 0;
c.ref.embedding.provider = config.embedding.provider.toNativeUtf8();
c.ref.clustering.numClusters = config.clustering.numClusters;
c.ref.clustering.threshold = config.clustering.threshold;
c.ref.minDurationOn = config.minDurationOn;
c.ref.minDurationOff = config.minDurationOff;
final ptr =
SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ??
nullptr;
calloc.free(c.ref.embedding.provider);
calloc.free(c.ref.embedding.model);
calloc.free(c.ref.segmentation.provider);
calloc.free(c.ref.segmentation.pyannote.model);
int sampleRate = 0;
if (ptr != nullptr) {
sampleRate = SherpaOnnxBindings
.sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
?.call(ptr) ??
0;
}
return OfflineSpeakerDiarization._(
ptr: ptr, config: config, sampleRate: sampleRate);
}
List<OfflineSpeakerDiarizationSegment> process(
{required Float32List samples}) {
if (ptr == nullptr) {
return <OfflineSpeakerDiarizationSegment>[];
}
final n = samples.length;
final Pointer<Float> p = calloc<Float>(n);
final pList = p.asTypedList(n);
pList.setAll(0, samples);
final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess
?.call(ptr, p, n) ??
nullptr;
final ans = _processImpl(r);
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
?.call(r);
return ans;
}
List<OfflineSpeakerDiarizationSegment> processWithCallback({
required Float32List samples,
required int Function(int numProcessedChunks, int numTotalChunks) callback,
}) {
if (ptr == nullptr) {
return <OfflineSpeakerDiarizationSegment>[];
}
final n = samples.length;
final Pointer<Float> p = calloc<Float>(n);
final pList = p.asTypedList(n);
pList.setAll(0, samples);
final wrapper = NativeCallable<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal(
(int numProcessedChunks, int numTotalChunks) {
return callback(numProcessedChunks, numTotalChunks);
}, exceptionalReturn: 0);
final r = SherpaOnnxBindings
.sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
?.call(ptr, p, n, wrapper.nativeFunction) ??
nullptr;
wrapper.close();
final ans = _processImpl(r);
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult
?.call(r);
return ans;
}
List<OfflineSpeakerDiarizationSegment> _processImpl(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> r) {
if (r == nullptr) {
return <OfflineSpeakerDiarizationSegment>[];
}
final numSegments = SherpaOnnxBindings
.sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
?.call(r) ??
0;
final segments = SherpaOnnxBindings
.sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
?.call(r) ??
nullptr;
if (segments == nullptr) {
return <OfflineSpeakerDiarizationSegment>[];
}
final ans = <OfflineSpeakerDiarizationSegment>[];
for (int i = 0; i != numSegments; ++i) {
final s = segments + i;
final tmp = OfflineSpeakerDiarizationSegment(
start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker);
ans.add(tmp);
}
SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment
?.call(segments);
return ans;
}
Pointer<SherpaOnnxOfflineSpeakerDiarization> ptr;
OfflineSpeakerDiarizationConfig config;
final int sampleRate;
}
... ...
... ... @@ -2,6 +2,66 @@
import 'dart:ffi';
import 'package:ffi/ffi.dart';
final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
external Pointer<Utf8> model;
@Int32()
external int numThreads;
@Int32()
external int debug;
external Pointer<Utf8> provider;
}
final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct {
@Float()
external double start;
@Float()
external double end;
@Int32()
external int speaker;
}
final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
extends Struct {
external Pointer<Utf8> model;
}
final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct {
external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote;
@Int32()
external int numThreads;
@Int32()
external int debug;
external Pointer<Utf8> provider;
}
final class SherpaOnnxFastClusteringConfig extends Struct {
@Int32()
external int numClusters;
@Float()
external double threshold;
}
final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct {
external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation;
external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding;
external SherpaOnnxFastClusteringConfig clustering;
@Float()
external double minDurationOn;
@Float()
external double minDurationOff;
}
final class SherpaOnnxOfflinePunctuationModelConfig extends Struct {
external Pointer<Utf8> ctTransformer;
... ... @@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct {
external int numSamples;
}
final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct {
external Pointer<Utf8> model;
@Int32()
external int numThreads;
@Int32()
external int debug;
external Pointer<Utf8> provider;
}
final class SherpaOnnxKeywordSpotterConfig extends Struct {
external SherpaOnnxFeatureConfig feat;
... ... @@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {}
final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {}
typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative
= Pointer<SherpaOnnxOfflineSpeakerDiarization> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
typedef SherpaOnnxCreateOfflineSpeakerDiarization
= SherpaOnnxCreateOfflineSpeakerDiarizationNative;
typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
typedef SherpaOnnxCreateOfflinePunctuationNative
= Pointer<SherpaOnnxOfflinePunctuation> Function(
Pointer<SherpaOnnxOfflinePunctuationConfig>);
typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>);
typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32
Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32
Function(Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
= Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
= SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative;
typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationSegment>);
typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, Int32);
typedef SherpaOnnxOfflineSpeakerDiarizationProcess
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>, Pointer<Float>, int);
typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32
Function(Int32, Int32);
typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
Pointer<Float>,
Int32,
Pointer<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
= Pointer<SherpaOnnxOfflineSpeakerDiarizationResult> Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
Pointer<Float>,
int,
Pointer<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>);
typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarizationResult>);
typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function(
Pointer<SherpaOnnxOfflineSpeakerDiarization>,
Pointer<SherpaOnnxOfflineSpeakerDiarizationConfig>);
typedef SherpaOnnxCreateOfflinePunctuation
= SherpaOnnxCreateOfflinePunctuationNative;
... ... @@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
class SherpaOnnxBindings {
static SherpaOnnxCreateOfflineSpeakerDiarization?
sherpaOnnxCreateOfflineSpeakerDiarization;
static SherpaOnnxDestroyOfflineSpeakerDiarization?
sherpaOnnxDestroyOfflineSpeakerDiarization;
static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate?
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate;
static SherpaOnnxOfflineSpeakerDiarizationSetConfig?
sherpaOnnxOfflineSpeakerDiarizationSetConfig;
static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers?
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers;
static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments?
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments;
static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime?
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime;
static SherpaOnnxOfflineSpeakerDiarizationDestroySegment?
sherpaOnnxOfflineSpeakerDiarizationDestroySegment;
static SherpaOnnxOfflineSpeakerDiarizationProcess?
sherpaOnnxOfflineSpeakerDiarizationProcess;
static SherpaOnnxOfflineSpeakerDiarizationDestroyResult?
sherpaOnnxOfflineSpeakerDiarizationDestroyResult;
static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg?
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg;
static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation;
static SherpaOnnxDestroyOfflinePunctuation?
sherpaOnnxDestroyOfflinePunctuation;
... ... @@ -1107,6 +1269,83 @@ class SherpaOnnxBindings {
static SherpaOnnxFreeWave? freeWave;
static void init(DynamicLibrary dynamicLibrary) {
sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxCreateOfflineSpeakerDiarizationNative>>(
'SherpaOnnxCreateOfflineSpeakerDiarization')
.asFunction();
sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>(
'SherpaOnnxDestroyOfflineSpeakerDiarization')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>(
'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>(
'SherpaOnnxOfflineSpeakerDiarizationSetConfig')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>(
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>(
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>(
'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>(
'SherpaOnnxOfflineSpeakerDiarizationDestroySegment')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationProcessNative>>(
'SherpaOnnxOfflineSpeakerDiarizationProcess')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>(
'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg')
.asFunction();
sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary
.lookup<
NativeFunction<
SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>(
'SherpaOnnxOfflineSpeakerDiarizationDestroyResult')
.asFunction();
sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCreateOfflinePunctuationNative>>(
'SherpaOnnxCreateOfflinePunctuation')
... ...
name: sherpa_onnx
description: >
Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
with onnxruntime without Internet connection.
Speech recognition, speech synthesis, speaker diarization, and speaker recognition
using next-gen Kaldi with onnxruntime without Internet connection.
repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter
... ... @@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/
topics:
- speech-recognition
- speech-synthesis
- speaker-identification
- speaker-diarization
- audio-tagging
- voice-activity-detection
... ... @@ -41,7 +41,7 @@ dependencies:
sherpa_onnx_linux: ^1.10.27
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.27
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows
... ...
name: speaker_diarization
description: >
This example demonstrates how to use the Dart API for speaker diarization.
version: 1.0.0
environment:
sdk: ">=3.0.0 <4.0.0"
dependencies:
sherpa_onnx:
path: ../../flutter/sherpa_onnx
path: ^1.9.0
dev_dependencies:
lints: ^3.0.0
... ...
... ... @@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
return ans;
}
const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
int32_t n,
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) {
auto wrapper = [callback](int32_t num_processed_chunks,
int32_t num_total_chunks, void *) {
return callback(num_processed_chunks, num_total_chunks);
};
auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult;
ans->impl = sd->impl->Process(samples, n, wrapper);
return ans;
}
#endif
... ...
... ... @@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(
int32_t num_processed_chunk, int32_t num_total_chunks, void *arg);
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(
int32_t num_processed_chunk, int32_t num_total_chunks);
// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
// to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
... ... @@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback,
void *arg);
SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult *
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples,
int32_t n,
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback);
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
const SherpaOnnxOfflineSpeakerDiarizationResult *r);
... ...
... ... @@ -5,6 +5,7 @@
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
#include <algorithm>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
... ...
... ... @@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback(
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
jsize n = env->GetArrayLength(samples);
auto segments =
sd->Process(p, n, callback_wrapper, (void *)arg).SortByStartTime();
sd->Process(p, n, callback_wrapper, reinterpret_cast<void *>(arg))
.SortByStartTime();
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
return ProcessImpl(env, segments);
... ...