Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-11 21:17:41 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-11 21:17:41 +0800
Commit
1ed803adc13a3b060a6b972253e3adfa81be8126
1ed803ad
1 parent
1851ff63
Dart API for speaker diarization (#1418)
隐藏空白字符变更
内嵌
并排对比
正在显示
21 个修改的文件
包含
733 行增加
和
17 行删除
.github/scripts/test-dart.sh
.github/workflows/test-dart.yaml
dart-api-examples/README.md
dart-api-examples/speaker-diarization/.gitignore
dart-api-examples/speaker-diarization/CHANGELOG.md
dart-api-examples/speaker-diarization/README.md
dart-api-examples/speaker-diarization/analysis_options.yaml
dart-api-examples/speaker-diarization/bin/init.dart
dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
dart-api-examples/speaker-diarization/pubspec.yaml
dart-api-examples/speaker-diarization/run.sh
flutter/sherpa_onnx/example/example.md
flutter/sherpa_onnx/lib/sherpa_onnx.dart
flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
flutter/sherpa_onnx/pubspec.yaml
scripts/dart/speaker-diarization-pubspec.yaml
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
sherpa-onnx/jni/offline-speaker-diarization.cc
.github/scripts/test-dart.sh
查看文件 @
1ed803a
...
...
@@ -4,6 +4,11 @@ set -ex
cd
dart-api-examples
pushd
speaker-diarization
echo
'----------speaker diarization----------'
./run.sh
popd
pushd
speaker-identification
echo
'----------3d speaker----------'
./run-3d-speaker.sh
...
...
.github/workflows/test-dart.yaml
查看文件 @
1ed803a
...
...
@@ -114,6 +114,7 @@ jobs:
cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml
cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
...
...
dart-api-examples/README.md
查看文件 @
1ed803a
...
...
@@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx
| Directory | Description |
|-----------|-------------|
|
[
./speaker-diarization
](
./speaker-diarization
)
| Example for speaker diarization.|
|
[
./add-punctuations
](
./add-punctuations
)
| Example for adding punctuations to text.|
|
[
./audio-tagging
](
./audio-tagging
)
| Example for audio tagging.|
|
[
./keyword-spotter
](
./keyword-spotter
)
| Example for keyword spotting|
...
...
dart-api-examples/speaker-diarization/.gitignore
0 → 100644
查看文件 @
1ed803a
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
...
...
dart-api-examples/speaker-diarization/CHANGELOG.md
0 → 100644
查看文件 @
1ed803a
## 1.0.0
-
Initial version.
...
...
dart-api-examples/speaker-diarization/README.md
0 → 100644
查看文件 @
1ed803a
# Introduction
This example shows how to use the Dart API from sherpa-onnx for speaker diarization.
# Usage
Please see
[
./run.sh
](
./run.sh
)
...
...
dart-api-examples/speaker-diarization/analysis_options.yaml
0 → 100644
查看文件 @
1ed803a
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.
include
:
package:lints/recommended.yaml
# Uncomment the following section to specify additional rules.
# linter:
# rules:
# - camel_case_types
# analyzer:
# exclude:
# - path/to/excluded/files/**
# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints
# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options
...
...
dart-api-examples/speaker-diarization/bin/init.dart
0 → 120000
查看文件 @
1ed803a
../../
vad
/
bin
/
init
.
dart
\ No newline at end of file
...
...
dart-api-examples/speaker-diarization/bin/speaker-diarization.dart
0 → 100644
查看文件 @
1ed803a
// Copyright (c) 2024 Xiaomi Corporation
import
'dart:io'
;
import
'dart:typed_data'
;
import
'dart:ffi'
;
import
'package:sherpa_onnx/sherpa_onnx.dart'
as
sherpa_onnx
;
import
'./init.dart'
;
void
main
(
List
<
String
>
arguments
)
async
{
await
initSherpaOnnx
();
/* Please use the following commands to download files used in this file
Step 1: Download a speaker segmentation model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
Step 2: Download a speaker embedding extractor model
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
for a list of available models. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
Step 3. Download test wave files
Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
for a list of available test wave files. The following is an example
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
Step 4. Run it
*/
final
segmentationModel
=
"./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"
;
final
embeddingModel
=
"./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
;
final
waveFilename
=
"./0-four-speakers-zh.wav"
;
final
segmentationConfig
=
sherpa_onnx
.
OfflineSpeakerSegmentationModelConfig
(
pyannote:
sherpa_onnx
.
OfflineSpeakerSegmentationPyannoteModelConfig
(
model:
segmentationModel
),
);
final
embeddingConfig
=
sherpa_onnx
.
SpeakerEmbeddingExtractorConfig
(
model:
embeddingModel
);
// since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set
// numClusters to 4. If you don't know the exact number, please set it to -1.
// in that case, you have to set threshold. A larger threshold leads to
// fewer clusters, i.e., fewer speakers.
final
clusteringConfig
=
sherpa_onnx
.
FastClusteringConfig
(
numClusters:
4
,
threshold:
0.5
);
var
config
=
sherpa_onnx
.
OfflineSpeakerDiarizationConfig
(
segmentation:
segmentationConfig
,
embedding:
embeddingConfig
,
clustering:
clusteringConfig
,
minDurationOn:
0.2
,
minDurationOff:
0.5
);
final
sd
=
sherpa_onnx
.
OfflineSpeakerDiarization
(
config
);
if
(
sd
.
ptr
==
nullptr
)
{
return
;
}
final
waveData
=
sherpa_onnx
.
readWave
(
waveFilename
);
if
(
sd
.
sampleRate
!=
waveData
.
sampleRate
)
{
print
(
'Expected sample rate:
${sd.sampleRate}
, given:
${waveData.sampleRate}
'
);
return
;
}
print
(
'started'
);
// Use the following statement if you don't want to use a callback
// final segments = sd.process(samples: waveData.samples);
final
segments
=
sd
.
processWithCallback
(
samples:
waveData
.
samples
,
callback:
(
int
numProcessedChunk
,
int
numTotalChunks
)
{
final
progress
=
100.0
*
numProcessedChunk
/
numTotalChunks
;
print
(
'Progress
${progress.toStringAsFixed(2)}
%'
);
return
0
;
});
for
(
int
i
=
0
;
i
<
segments
.
length
;
++
i
)
{
print
(
'
${segments[i].start.toStringAsFixed(3)}
--
${segments[i].end.toStringAsFixed(3)}
speaker_
${segments[i].speaker}
'
);
}
}
...
...
dart-api-examples/speaker-diarization/pubspec.yaml
0 → 100644
查看文件 @
1ed803a
name
:
speaker_diarization
description
:
>
This example demonstrates how to use the Dart API for speaker diarization.
version
:
1.0.0
environment
:
sdk
:
"
>=3.0.0
<4.0.0"
dependencies
:
sherpa_onnx
:
^1.10.27
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path
:
^1.9.0
dev_dependencies
:
lints
:
^3.0.0
...
...
dart-api-examples/speaker-diarization/run.sh
0 → 100755
查看文件 @
1ed803a
#!/usr/bin/env bash
set
-ex
dart pub get
if
[
! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
fi
if
[
! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
if
[
! -f ./0-four-speakers-zh.wav
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
fi
dart run ./bin/speaker-diarization.dart
...
...
flutter/sherpa_onnx/example/example.md
查看文件 @
1ed803a
...
...
@@ -11,6 +11,7 @@
| Functions | URL | Supported Platforms|
|---|---|---|
|Speaker diarization|
[
Address
](
https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization
)
| macOS, Windows, Linux|
|Streaming speech recognition|
[
Address
](
https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr
)
| macOS, Windows, Linux|
|Non-Streaming speech recognition|
[
Address
](
https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr
)
| macOS, Windows, Linux|
|Text to speech|
[
Address
](
https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts
)
| macOS, Windows, Linux|
...
...
flutter/sherpa_onnx/lib/sherpa_onnx.dart
查看文件 @
1ed803a
...
...
@@ -6,6 +6,7 @@ export 'src/audio_tagging.dart';
export
'src/feature_config.dart'
;
export
'src/keyword_spotter.dart'
;
export
'src/offline_recognizer.dart'
;
export
'src/offline_speaker_diarization.dart'
;
export
'src/offline_stream.dart'
;
export
'src/online_recognizer.dart'
;
export
'src/online_stream.dart'
;
...
...
flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart
0 → 100644
查看文件 @
1ed803a
// Copyright (c) 2024 Xiaomi Corporation
import
'dart:ffi'
;
import
'dart:typed_data'
;
import
'package:ffi/ffi.dart'
;
import
'./sherpa_onnx_bindings.dart'
;
import
'./speaker_identification.dart'
;
class
OfflineSpeakerDiarizationSegment
{
const
OfflineSpeakerDiarizationSegment
({
required
this
.
start
,
required
this
.
end
,
required
this
.
speaker
,
});
@override
String
toString
()
{
return
'OfflineSpeakerDiarizationSegment(start:
$start
, end:
$end
, speaker:
$speaker
)'
;
}
final
double
start
;
final
double
end
;
final
int
speaker
;
}
class
OfflineSpeakerSegmentationPyannoteModelConfig
{
const
OfflineSpeakerSegmentationPyannoteModelConfig
({
this
.
model
=
''
,
});
@override
String
toString
()
{
return
'OfflineSpeakerSegmentationPyannoteModelConfig(model:
$model
)'
;
}
final
String
model
;
}
class
OfflineSpeakerSegmentationModelConfig
{
const
OfflineSpeakerSegmentationModelConfig
({
this
.
pyannote
=
const
OfflineSpeakerSegmentationPyannoteModelConfig
(),
this
.
numThreads
=
1
,
this
.
debug
=
true
,
this
.
provider
=
'cpu'
,
});
@override
String
toString
()
{
return
'OfflineSpeakerSegmentationModelConfig(pyannote:
$pyannote
, numThreads:
$numThreads
, debug:
$debug
, provider:
$provider
)'
;
}
final
OfflineSpeakerSegmentationPyannoteModelConfig
pyannote
;
final
int
numThreads
;
final
bool
debug
;
final
String
provider
;
}
class
FastClusteringConfig
{
const
FastClusteringConfig
({
this
.
numClusters
=
-
1
,
this
.
threshold
=
0.5
,
});
@override
String
toString
()
{
return
'FastClusteringConfig(numClusters:
$numClusters
, threshold:
$threshold
)'
;
}
final
int
numClusters
;
final
double
threshold
;
}
class
OfflineSpeakerDiarizationConfig
{
const
OfflineSpeakerDiarizationConfig
({
this
.
segmentation
=
const
OfflineSpeakerSegmentationModelConfig
(),
this
.
embedding
=
const
SpeakerEmbeddingExtractorConfig
(
model:
''
),
this
.
clustering
=
const
FastClusteringConfig
(),
this
.
minDurationOn
=
0.2
,
this
.
minDurationOff
=
0.5
,
});
@override
String
toString
()
{
return
'OfflineSpeakerDiarizationConfig(segmentation:
$segmentation
, embedding:
$embedding
, clustering:
$clustering
, minDurationOn:
$minDurationOn
, minDurationOff:
$minDurationOff
)'
;
}
final
OfflineSpeakerSegmentationModelConfig
segmentation
;
final
SpeakerEmbeddingExtractorConfig
embedding
;
final
FastClusteringConfig
clustering
;
final
double
minDurationOff
;
// in seconds
final
double
minDurationOn
;
// in seconds
}
class
OfflineSpeakerDiarization
{
OfflineSpeakerDiarization
.
_
(
{
required
this
.
ptr
,
required
this
.
config
,
required
this
.
sampleRate
});
void
free
()
{
SherpaOnnxBindings
.
sherpaOnnxDestroyOfflineSpeakerDiarization
?.
call
(
ptr
);
ptr
=
nullptr
;
}
/// The user is responsible to call the OfflineSpeakerDiarization.free()
/// method of the returned instance to avoid memory leak.
factory
OfflineSpeakerDiarization
(
OfflineSpeakerDiarizationConfig
config
)
{
final
c
=
calloc
<
SherpaOnnxOfflineSpeakerDiarizationConfig
>();
c
.
ref
.
segmentation
.
pyannote
.
model
=
config
.
segmentation
.
pyannote
.
model
.
toNativeUtf8
();
c
.
ref
.
segmentation
.
numThreads
=
config
.
segmentation
.
numThreads
;
c
.
ref
.
segmentation
.
debug
=
config
.
segmentation
.
debug
?
1
:
0
;
c
.
ref
.
segmentation
.
provider
=
config
.
segmentation
.
provider
.
toNativeUtf8
();
c
.
ref
.
embedding
.
model
=
config
.
embedding
.
model
.
toNativeUtf8
();
c
.
ref
.
embedding
.
numThreads
=
config
.
embedding
.
numThreads
;
c
.
ref
.
embedding
.
debug
=
config
.
embedding
.
debug
?
1
:
0
;
c
.
ref
.
embedding
.
provider
=
config
.
embedding
.
provider
.
toNativeUtf8
();
c
.
ref
.
clustering
.
numClusters
=
config
.
clustering
.
numClusters
;
c
.
ref
.
clustering
.
threshold
=
config
.
clustering
.
threshold
;
c
.
ref
.
minDurationOn
=
config
.
minDurationOn
;
c
.
ref
.
minDurationOff
=
config
.
minDurationOff
;
final
ptr
=
SherpaOnnxBindings
.
sherpaOnnxCreateOfflineSpeakerDiarization
?.
call
(
c
)
??
nullptr
;
calloc
.
free
(
c
.
ref
.
embedding
.
provider
);
calloc
.
free
(
c
.
ref
.
embedding
.
model
);
calloc
.
free
(
c
.
ref
.
segmentation
.
provider
);
calloc
.
free
(
c
.
ref
.
segmentation
.
pyannote
.
model
);
int
sampleRate
=
0
;
if
(
ptr
!=
nullptr
)
{
sampleRate
=
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
?.
call
(
ptr
)
??
0
;
}
return
OfflineSpeakerDiarization
.
_
(
ptr:
ptr
,
config:
config
,
sampleRate:
sampleRate
);
}
List
<
OfflineSpeakerDiarizationSegment
>
process
(
{
required
Float32List
samples
})
{
if
(
ptr
==
nullptr
)
{
return
<
OfflineSpeakerDiarizationSegment
>[];
}
final
n
=
samples
.
length
;
final
Pointer
<
Float
>
p
=
calloc
<
Float
>(
n
);
final
pList
=
p
.
asTypedList
(
n
);
pList
.
setAll
(
0
,
samples
);
final
r
=
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationProcess
?.
call
(
ptr
,
p
,
n
)
??
nullptr
;
final
ans
=
_processImpl
(
r
);
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationDestroyResult
?.
call
(
r
);
return
ans
;
}
List
<
OfflineSpeakerDiarizationSegment
>
processWithCallback
({
required
Float32List
samples
,
required
int
Function
(
int
numProcessedChunks
,
int
numTotalChunks
)
callback
,
})
{
if
(
ptr
==
nullptr
)
{
return
<
OfflineSpeakerDiarizationSegment
>[];
}
final
n
=
samples
.
length
;
final
Pointer
<
Float
>
p
=
calloc
<
Float
>(
n
);
final
pList
=
p
.
asTypedList
(
n
);
pList
.
setAll
(
0
,
samples
);
final
wrapper
=
NativeCallable
<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
>.
isolateLocal
(
(
int
numProcessedChunks
,
int
numTotalChunks
)
{
return
callback
(
numProcessedChunks
,
numTotalChunks
);
},
exceptionalReturn:
0
);
final
r
=
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
?.
call
(
ptr
,
p
,
n
,
wrapper
.
nativeFunction
)
??
nullptr
;
wrapper
.
close
();
final
ans
=
_processImpl
(
r
);
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationDestroyResult
?.
call
(
r
);
return
ans
;
}
List
<
OfflineSpeakerDiarizationSegment
>
_processImpl
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>
r
)
{
if
(
r
==
nullptr
)
{
return
<
OfflineSpeakerDiarizationSegment
>[];
}
final
numSegments
=
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
?.
call
(
r
)
??
0
;
final
segments
=
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
?.
call
(
r
)
??
nullptr
;
if
(
segments
==
nullptr
)
{
return
<
OfflineSpeakerDiarizationSegment
>[];
}
final
ans
=
<
OfflineSpeakerDiarizationSegment
>[];
for
(
int
i
=
0
;
i
!=
numSegments
;
++
i
)
{
final
s
=
segments
+
i
;
final
tmp
=
OfflineSpeakerDiarizationSegment
(
start:
s
.
ref
.
start
,
end:
s
.
ref
.
end
,
speaker:
s
.
ref
.
speaker
);
ans
.
add
(
tmp
);
}
SherpaOnnxBindings
.
sherpaOnnxOfflineSpeakerDiarizationDestroySegment
?.
call
(
segments
);
return
ans
;
}
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>
ptr
;
OfflineSpeakerDiarizationConfig
config
;
final
int
sampleRate
;
}
...
...
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @
1ed803a
...
...
@@ -2,6 +2,66 @@
import
'dart:ffi'
;
import
'package:ffi/ffi.dart'
;
final
class
SherpaOnnxSpeakerEmbeddingExtractorConfig
extends
Struct
{
external
Pointer
<
Utf8
>
model
;
@Int32
()
external
int
numThreads
;
@Int32
()
external
int
debug
;
external
Pointer
<
Utf8
>
provider
;
}
final
class
SherpaOnnxOfflineSpeakerDiarizationSegment
extends
Struct
{
@Float
()
external
double
start
;
@Float
()
external
double
end
;
@Int32
()
external
int
speaker
;
}
final
class
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
extends
Struct
{
external
Pointer
<
Utf8
>
model
;
}
final
class
SherpaOnnxOfflineSpeakerSegmentationModelConfig
extends
Struct
{
external
SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
pyannote
;
@Int32
()
external
int
numThreads
;
@Int32
()
external
int
debug
;
external
Pointer
<
Utf8
>
provider
;
}
final
class
SherpaOnnxFastClusteringConfig
extends
Struct
{
@Int32
()
external
int
numClusters
;
@Float
()
external
double
threshold
;
}
final
class
SherpaOnnxOfflineSpeakerDiarizationConfig
extends
Struct
{
external
SherpaOnnxOfflineSpeakerSegmentationModelConfig
segmentation
;
external
SherpaOnnxSpeakerEmbeddingExtractorConfig
embedding
;
external
SherpaOnnxFastClusteringConfig
clustering
;
@Float
()
external
double
minDurationOn
;
@Float
()
external
double
minDurationOff
;
}
final
class
SherpaOnnxOfflinePunctuationModelConfig
extends
Struct
{
external
Pointer
<
Utf8
>
ctTransformer
;
...
...
@@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct {
external
int
numSamples
;
}
final
class
SherpaOnnxSpeakerEmbeddingExtractorConfig
extends
Struct
{
external
Pointer
<
Utf8
>
model
;
@Int32
()
external
int
numThreads
;
@Int32
()
external
int
debug
;
external
Pointer
<
Utf8
>
provider
;
}
final
class
SherpaOnnxKeywordSpotterConfig
extends
Struct
{
external
SherpaOnnxFeatureConfig
feat
;
...
...
@@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
final
class
SherpaOnnxSpeakerEmbeddingManager
extends
Opaque
{}
final
class
SherpaOnnxOfflineSpeakerDiarization
extends
Opaque
{}
final
class
SherpaOnnxOfflineSpeakerDiarizationResult
extends
Opaque
{}
typedef
SherpaOnnxCreateOfflineSpeakerDiarizationNative
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationConfig
>);
typedef
SherpaOnnxCreateOfflineSpeakerDiarization
=
SherpaOnnxCreateOfflineSpeakerDiarizationNative
;
typedef
SherpaOnnxDestroyOfflineSpeakerDiarizationNative
=
Void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>);
typedef
SherpaOnnxDestroyOfflineSpeakerDiarization
=
void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>);
typedef
SherpaOnnxCreateOfflinePunctuationNative
=
Pointer
<
SherpaOnnxOfflinePunctuation
>
Function
(
Pointer
<
SherpaOnnxOfflinePunctuationConfig
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative
=
Int32
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
=
int
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationSetConfigNative
=
Void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationConfig
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative
=
Int32
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers
=
int
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative
=
Int32
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
=
int
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationSegment
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
=
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
;
typedef
SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative
=
Void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationSegment
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationDestroySegment
=
void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationSegment
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationProcessNative
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
Float
>,
Int32
);
typedef
SherpaOnnxOfflineSpeakerDiarizationProcess
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
Float
>,
int
);
typedef
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
=
Int32
Function
(
Int32
,
Int32
);
typedef
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
Float
>,
Int32
,
Pointer
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
>>);
typedef
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
=
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
Float
>,
int
,
Pointer
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative
>>);
typedef
SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative
=
Void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
=
void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationResult
>);
typedef
SherpaOnnxOfflineSpeakerDiarizationSetConfig
=
void
Function
(
Pointer
<
SherpaOnnxOfflineSpeakerDiarization
>,
Pointer
<
SherpaOnnxOfflineSpeakerDiarizationConfig
>);
typedef
SherpaOnnxCreateOfflinePunctuation
=
SherpaOnnxCreateOfflinePunctuationNative
;
...
...
@@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
typedef
SherpaOnnxFreeWave
=
void
Function
(
Pointer
<
SherpaOnnxWave
>);
class
SherpaOnnxBindings
{
static
SherpaOnnxCreateOfflineSpeakerDiarization
?
sherpaOnnxCreateOfflineSpeakerDiarization
;
static
SherpaOnnxDestroyOfflineSpeakerDiarization
?
sherpaOnnxDestroyOfflineSpeakerDiarization
;
static
SherpaOnnxOfflineSpeakerDiarizationGetSampleRate
?
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
;
static
SherpaOnnxOfflineSpeakerDiarizationSetConfig
?
sherpaOnnxOfflineSpeakerDiarizationSetConfig
;
static
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers
?
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers
;
static
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
?
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
;
static
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
?
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
;
static
SherpaOnnxOfflineSpeakerDiarizationDestroySegment
?
sherpaOnnxOfflineSpeakerDiarizationDestroySegment
;
static
SherpaOnnxOfflineSpeakerDiarizationProcess
?
sherpaOnnxOfflineSpeakerDiarizationProcess
;
static
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
?
sherpaOnnxOfflineSpeakerDiarizationDestroyResult
;
static
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
?
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
;
static
SherpaOnnxCreateOfflinePunctuation
?
sherpaOnnxCreateOfflinePunctuation
;
static
SherpaOnnxDestroyOfflinePunctuation
?
sherpaOnnxDestroyOfflinePunctuation
;
...
...
@@ -1107,6 +1269,83 @@ class SherpaOnnxBindings {
static
SherpaOnnxFreeWave
?
freeWave
;
static
void
init
(
DynamicLibrary
dynamicLibrary
)
{
sherpaOnnxCreateOfflineSpeakerDiarization
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxCreateOfflineSpeakerDiarizationNative
>>(
'SherpaOnnxCreateOfflineSpeakerDiarization'
)
.
asFunction
();
sherpaOnnxDestroyOfflineSpeakerDiarization
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxDestroyOfflineSpeakerDiarizationNative
>>(
'SherpaOnnxDestroyOfflineSpeakerDiarization'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationGetSampleRate
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationSetConfig
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationSetConfigNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationSetConfig'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationDestroySegment
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationDestroySegment'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationProcess
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationProcessNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationProcess'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg'
)
.
asFunction
();
sherpaOnnxOfflineSpeakerDiarizationDestroyResult
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative
>>(
'SherpaOnnxOfflineSpeakerDiarizationDestroyResult'
)
.
asFunction
();
sherpaOnnxCreateOfflinePunctuation
??=
dynamicLibrary
.
lookup
<
NativeFunction
<
SherpaOnnxCreateOfflinePunctuationNative
>>(
'SherpaOnnxCreateOfflinePunctuation'
)
...
...
flutter/sherpa_onnx/pubspec.yaml
查看文件 @
1ed803a
name
:
sherpa_onnx
description
:
>
Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi
with onnxruntime without Internet connection.
Speech recognition, speech synthesis, speaker diarization, and speaker recognition
using next-gen Kaldi with onnxruntime without Internet connection.
repository
:
https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter
...
...
@@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/
topics
:
-
speech-recognition
-
speech-synthesis
-
speaker-
identific
ation
-
speaker-
diariz
ation
-
audio-tagging
-
voice-activity-detection
...
...
@@ -41,7 +41,7 @@ dependencies:
sherpa_onnx_linux
:
^1.10.27
# sherpa_onnx_linux:
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows
:
^1.10.27
# sherpa_onnx_windows:
# path: ../sherpa_onnx_windows
...
...
scripts/dart/speaker-diarization-pubspec.yaml
0 → 100644
查看文件 @
1ed803a
name
:
speaker_diarization
description
:
>
This example demonstrates how to use the Dart API for speaker diarization.
version
:
1.0.0
environment
:
sdk
:
"
>=3.0.0
<4.0.0"
dependencies
:
sherpa_onnx
:
path
:
../../flutter/sherpa_onnx
path
:
^1.9.0
dev_dependencies
:
lints
:
^3.0.0
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
1ed803a
...
...
@@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
return
ans
;
}
const
SherpaOnnxOfflineSpeakerDiarizationResult
*
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
(
const
SherpaOnnxOfflineSpeakerDiarization
*
sd
,
const
float
*
samples
,
int32_t
n
,
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg
callback
)
{
auto
wrapper
=
[
callback
](
int32_t
num_processed_chunks
,
int32_t
num_total_chunks
,
void
*
)
{
return
callback
(
num_processed_chunks
,
num_total_chunks
);
};
auto
ans
=
new
SherpaOnnxOfflineSpeakerDiarizationResult
;
ans
->
impl
=
sd
->
impl
->
Process
(
samples
,
n
,
wrapper
);
return
ans
;
}
#endif
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
1ed803a
...
...
@@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
typedef
int32_t
(
*
SherpaOnnxOfflineSpeakerDiarizationProgressCallback
)(
int32_t
num_processed_chunk
,
int32_t
num_total_chunks
,
void
*
arg
);
typedef
int32_t
(
*
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg
)(
int32_t
num_processed_chunk
,
int32_t
num_total_chunks
);
// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
// to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API
const
SherpaOnnxOfflineSpeakerDiarizationResult
*
...
...
@@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
int32_t
n
,
SherpaOnnxOfflineSpeakerDiarizationProgressCallback
callback
,
void
*
arg
);
SHERPA_ONNX_API
const
SherpaOnnxOfflineSpeakerDiarizationResult
*
SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg
(
const
SherpaOnnxOfflineSpeakerDiarization
*
sd
,
const
float
*
samples
,
int32_t
n
,
SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg
callback
);
SHERPA_ONNX_API
void
SherpaOnnxOfflineSpeakerDiarizationDestroyResult
(
const
SherpaOnnxOfflineSpeakerDiarizationResult
*
r
);
...
...
sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h
查看文件 @
1ed803a
...
...
@@ -5,6 +5,7 @@
#define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_
#include <algorithm>
#include <memory>
#include <unordered_map>
#include <utility>
#include <vector>
...
...
sherpa-onnx/jni/offline-speaker-diarization.cc
查看文件 @
1ed803a
...
...
@@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback(
jfloat
*
p
=
env
->
GetFloatArrayElements
(
samples
,
nullptr
);
jsize
n
=
env
->
GetArrayLength
(
samples
);
auto
segments
=
sd
->
Process
(
p
,
n
,
callback_wrapper
,
(
void
*
)
arg
).
SortByStartTime
();
sd
->
Process
(
p
,
n
,
callback_wrapper
,
reinterpret_cast
<
void
*>
(
arg
))
.
SortByStartTime
();
env
->
ReleaseFloatArrayElements
(
samples
,
p
,
JNI_ABORT
);
return
ProcessImpl
(
env
,
segments
);
...
...
请
注册
或
登录
后发表评论