Vad.kt
2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class SileroVadModelConfig(
var model: String,
var threshold: Float = 0.5F,
var minSilenceDuration: Float = 0.25F,
var minSpeechDuration: Float = 0.25F,
var windowSize: Int = 512,
var maxSpeechDuration: Float = 5.0F,
)
data class VadModelConfig(
var sileroVadModelConfig: SileroVadModelConfig,
var sampleRate: Int = 16000,
var numThreads: Int = 1,
var provider: String = "cpu",
var debug: Boolean = false,
)
class SpeechSegment(val start: Int, val samples: FloatArray)
class Vad(
assetManager: AssetManager? = null,
var config: VadModelConfig,
) {
private var ptr: Long
init {
if (assetManager != null) {
ptr = newFromAsset(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
if (ptr != 0L) {
delete(ptr)
ptr = 0
}
}
fun release() = finalize()
fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples)
fun empty(): Boolean = empty(ptr)
fun pop() = pop(ptr)
fun front(): SpeechSegment {
val segment = front(ptr)
return SpeechSegment(segment[0] as Int, segment[1] as FloatArray)
}
fun clear() = clear(ptr)
fun isSpeechDetected(): Boolean = isSpeechDetected(ptr)
fun reset() = reset(ptr)
fun flush() = flush(ptr)
private external fun delete(ptr: Long)
private external fun newFromAsset(
assetManager: AssetManager,
config: VadModelConfig,
): Long
private external fun newFromFile(
config: VadModelConfig,
): Long
private external fun acceptWaveform(ptr: Long, samples: FloatArray)
private external fun empty(ptr: Long): Boolean
private external fun pop(ptr: Long)
private external fun clear(ptr: Long)
private external fun front(ptr: Long): Array<Any>
private external fun isSpeechDetected(ptr: Long): Boolean
private external fun reset(ptr: Long)
private external fun flush(ptr: Long)
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
// Please visit
// https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx
// to download silero_vad.onnx
// and put it inside the assets/
// directory
fun getVadModelConfig(type: Int): VadModelConfig? {
when (type) {
0 -> {
return VadModelConfig(
sileroVadModelConfig = SileroVadModelConfig(
model = "silero_vad.onnx",
threshold = 0.5F,
minSilenceDuration = 0.25F,
minSpeechDuration = 0.25F,
windowSize = 512,
),
sampleRate = 16000,
numThreads = 1,
provider = "cpu",
)
}
}
return null;
}