test-on-rk3588-board.py
3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
# Please run this file on your rk3588 board
try:
from rknnlite.api import RKNNLite
except:
print("Please run this file on your board (linux + aarch64 + npu)")
print("You need to install rknn_toolkit_lite2")
print(
" from https://github.com/airockchip/rknn-toolkit2/tree/master/rknn-toolkit-lite2/packages"
)
print(
"https://github.com/airockchip/rknn-toolkit2/blob/v2.1.0/rknn-toolkit-lite2/packages/rknn_toolkit_lite2-2.1.0-cp310-cp310-linux_aarch64.whl"
)
print("is known to work")
raise
import time
from pathlib import Path
from typing import Tuple
import numpy as np
import soundfile as sf
def load_audio(filename: str) -> Tuple[np.ndarray, int]:
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate
def init_model(filename, target_platform="rk3588"):
if not Path(filename).is_file():
exit(f"{filename} does not exist")
rknn_lite = RKNNLite(verbose=False)
ret = rknn_lite.load_rknn(path=filename)
if ret != 0:
exit(f"Load model {filename} failed!")
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_0)
if ret != 0:
exit(f"Failed to init rknn runtime for {filename}")
return rknn_lite
class RKNNModel:
def __init__(self, model: str, target_platform="rk3588"):
self.model = init_model(model)
def release(self):
self.model.release()
def __call__(self, x: np.ndarray, h: np.ndarray, c: np.ndarray):
"""
Args:
x: (1, 512), np.float32
h: (2, 1, 64), np.float32
c: (2, 1, 64), np.float32
Returns:
prob:
next_h:
next_c
"""
out, next_h, next_c = self.model.inference(inputs=[x, h, c])
return out.item(), next_h, next_c
def main():
model = RKNNModel(model="./m.rknn")
for i in range(1):
test(model)
def test(model):
print("started")
start = time.time()
samples, sample_rate = load_audio("./lei-jun-test.wav")
assert sample_rate == 16000, sample_rate
window_size = 512
h = np.zeros((2, 1, 64), dtype=np.float32)
c = np.zeros((2, 1, 64), dtype=np.float32)
threshold = 0.5
num_windows = samples.shape[0] // window_size
out = []
for i in range(num_windows):
print(i, num_windows)
this_samples = samples[i * window_size : (i + 1) * window_size]
prob, h, c = model(this_samples[None], h, c)
out.append(prob > threshold)
min_speech_duration = 0.25 * sample_rate / window_size
min_silence_duration = 0.25 * sample_rate / window_size
result = []
last = -1
for k, f in enumerate(out):
if f >= threshold:
if last == -1:
last = k
elif last != -1:
if k - last > min_speech_duration:
result.append((last, k))
last = -1
if last != -1 and k - last > min_speech_duration:
result.append((last, k))
if not result:
print("Empty for ./lei-jun-test.wav")
return
print(result)
final = [result[0]]
for r in result[1:]:
f = final[-1]
if r[0] - f[1] < min_silence_duration:
final[-1] = (f[0], r[1])
else:
final.append(r)
for f in final:
start = f[0] * window_size / sample_rate
end = f[1] * window_size / sample_rate
print("{:.3f} -- {:.3f}".format(start, end))
if __name__ == "__main__":
main()