Speaker.kt
4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
import android.util.Log
data class SpeakerEmbeddingExtractorConfig(
val model: String,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
)
class SpeakerEmbeddingExtractor(
assetManager: AssetManager? = null,
config: SpeakerEmbeddingExtractorConfig,
) {
private var ptr: Long
init {
ptr = if (assetManager != null) {
newFromAsset(assetManager, config)
} else {
newFromFile(config)
}
}
protected fun finalize() {
if (ptr != 0L) {
delete(ptr)
ptr = 0
}
}
fun release() = finalize()
fun createStream(): OnlineStream {
val p = createStream(ptr)
return OnlineStream(p)
}
fun isReady(stream: OnlineStream) = isReady(ptr, stream.ptr)
fun compute(stream: OnlineStream) = compute(ptr, stream.ptr)
fun dim() = dim(ptr)
private external fun newFromAsset(
assetManager: AssetManager,
config: SpeakerEmbeddingExtractorConfig,
): Long
private external fun newFromFile(
config: SpeakerEmbeddingExtractorConfig,
): Long
private external fun delete(ptr: Long)
private external fun createStream(ptr: Long): Long
private external fun isReady(ptr: Long, streamPtr: Long): Boolean
private external fun compute(ptr: Long, streamPtr: Long): FloatArray
private external fun dim(ptr: Long): Int
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
class SpeakerEmbeddingManager(val dim: Int) {
private var ptr: Long
init {
ptr = create(dim)
}
protected fun finalize() {
if (ptr != 0L) {
delete(ptr)
ptr = 0
}
}
fun release() = finalize()
fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding)
fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding)
fun remove(name: String) = remove(ptr, name)
fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold)
fun verify(name: String, embedding: FloatArray, threshold: Float) =
verify(ptr, name, embedding, threshold)
fun contains(name: String) = contains(ptr, name)
fun numSpeakers() = numSpeakers(ptr)
fun allSpeakerNames() = allSpeakerNames(ptr)
private external fun create(dim: Int): Long
private external fun delete(ptr: Long): Unit
private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean
private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean
private external fun remove(ptr: Long, name: String): Boolean
private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String
private external fun verify(
ptr: Long,
name: String,
embedding: FloatArray,
threshold: Float
): Boolean
private external fun contains(ptr: Long, name: String): Boolean
private external fun numSpeakers(ptr: Long): Int
private external fun allSpeakerNames(ptr: Long): Array<String>
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
// Please download the model file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
// and put it inside the assets directory.
//
// Please don't put it in a subdirectory of assets
private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
object SpeakerRecognition {
var _extractor: SpeakerEmbeddingExtractor? = null
var _manager: SpeakerEmbeddingManager? = null
val extractor: SpeakerEmbeddingExtractor
get() {
return _extractor!!
}
val manager: SpeakerEmbeddingManager
get() {
return _manager!!
}
fun initExtractor(assetManager: AssetManager? = null) {
synchronized(this) {
if (_extractor != null) {
return
}
Log.i("sherpa-onnx", "Initializing speaker embedding extractor")
_extractor = SpeakerEmbeddingExtractor(
assetManager = assetManager,
config = SpeakerEmbeddingExtractorConfig(
model = modelName,
numThreads = 2,
debug = false,
provider = "cpu",
)
)
_manager = SpeakerEmbeddingManager(dim = _extractor!!.dim())
}
}
}