Vad.ets
3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import {
circularBufferGet,
circularBufferHead,
circularBufferPop,
circularBufferPush,
circularBufferReset,
circularBufferSize,
createCircularBuffer,
createVoiceActivityDetector,
voiceActivityDetectorAcceptWaveform,
voiceActivityDetectorClear,
voiceActivityDetectorFlush,
voiceActivityDetectorFront,
voiceActivityDetectorIsDetected,
voiceActivityDetectorIsEmpty,
voiceActivityDetectorPop,
voiceActivityDetectorReset,
} from 'libsherpa_onnx.so';
export class SileroVadConfig {
public model: string;
public threshold: number;
public minSpeechDuration: number;
public minSilenceDuration: number;
public windowSize: number;
public constructor(model: string, threshold: number, minSpeechDuration: number, minSilenceDuration: number,
windowSize: number) {
this.model = model;
this.threshold = threshold;
this.minSpeechDuration = minSpeechDuration;
this.minSilenceDuration = minSilenceDuration;
this.windowSize = windowSize;
}
}
export class VadConfig {
public sileroVad: SileroVadConfig;
public sampleRate: number;
public debug: boolean;
public numThreads: number;
public constructor(sileroVad: SileroVadConfig, sampleRate: number, debug: boolean, numThreads: number) {
this.sileroVad = sileroVad;
this.sampleRate = sampleRate;
this.debug = debug;
this.numThreads = numThreads;
}
}
export class CircularBuffer {
private handle: object;
constructor(capacity: number) {
this.handle = createCircularBuffer(capacity);
}
// samples is a float32 array
push(samples: Float32Array) {
console.log(`here samples: ${samples}`);
circularBufferPush(this.handle, samples);
}
// return a float32 array
get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array {
return circularBufferGet(
this.handle, startIndex, n, enableExternalBuffer);
}
pop(n: number) {
circularBufferPop(this.handle, n);
}
size(): number {
return circularBufferSize(this.handle);
}
head(): number {
return circularBufferHead(this.handle);
}
reset() {
circularBufferReset(this.handle);
}
}
export interface SpeechSegment {
samples: Float32Array;
start: number;
}
export class Vad {
public config: VadConfig;
private handle: object;
constructor(config: VadConfig, bufferSizeInSeconds?: number, mgr?: object) {
this.handle =
createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
this.config = config;
}
acceptWaveform(samples: Float32Array): void {
voiceActivityDetectorAcceptWaveform(this.handle, samples);
}
isEmpty(): boolean {
return voiceActivityDetectorIsEmpty(this.handle);
}
isDetected(): boolean {
return voiceActivityDetectorIsDetected(this.handle);
}
pop(): void {
voiceActivityDetectorPop(this.handle);
}
clear(): void {
voiceActivityDetectorClear(this.handle);
}
front(enableExternalBuffer = true): SpeechSegment {
return voiceActivityDetectorFront(this.handle, enableExternalBuffer);
}
reset(): void {
voiceActivityDetectorReset(this.handle);
}
flush(): void {
voiceActivityDetectorFlush(this.handle);
}
}