OfflineSpeakerDiarization.kt
2.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class OfflineSpeakerSegmentationPyannoteModelConfig(
var model: String,
)
data class OfflineSpeakerSegmentationModelConfig(
var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
)
data class FastClusteringConfig(
var numClusters: Int = -1,
var threshold: Float = 0.5f,
)
data class OfflineSpeakerDiarizationConfig(
var segmentation: OfflineSpeakerSegmentationModelConfig,
var embedding: SpeakerEmbeddingExtractorConfig,
var clustering: FastClusteringConfig,
var minDurationOn: Float = 0.2f,
var minDurationOff: Float = 0.5f,
)
data class OfflineSpeakerDiarizationSegment(
val start: Float, // in seconds
val end: Float, // in seconds
val speaker: Int, // ID of the speaker; count from 0
)
class OfflineSpeakerDiarization(
assetManager: AssetManager? = null,
config: OfflineSpeakerDiarizationConfig,
) {
private var ptr: Long
init {
ptr = if (assetManager != null) {
newFromAsset(assetManager, config)
} else {
newFromFile(config)
}
}
protected fun finalize() {
if (ptr != 0L) {
delete(ptr)
ptr = 0
}
}
fun release() = finalize()
// Only config.clustering is used. All other fields in config
// are ignored
fun setConfig(config: OfflineSpeakerDiarizationConfig) = setConfig(ptr, config)
fun sampleRate() = getSampleRate(ptr)
fun process(samples: FloatArray) = process(ptr, samples)
fun processWithCallback(
samples: FloatArray,
callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
arg: Long = 0,
) = processWithCallback(ptr, samples, callback, arg)
private external fun delete(ptr: Long)
private external fun newFromAsset(
assetManager: AssetManager,
config: OfflineSpeakerDiarizationConfig,
): Long
private external fun newFromFile(
config: OfflineSpeakerDiarizationConfig,
): Long
private external fun setConfig(ptr: Long, config: OfflineSpeakerDiarizationConfig)
private external fun getSampleRate(ptr: Long): Int
private external fun process(ptr: Long, samples: FloatArray): Array<OfflineSpeakerDiarizationSegment>
private external fun processWithCallback(
ptr: Long,
samples: FloatArray,
callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int,
arg: Long,
): Array<OfflineSpeakerDiarizationSegment>
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}