StreamingAsr.ets
3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import {
acceptWaveformOnline,
createOnlineRecognizer,
createOnlineStream,
decodeOnlineStream,
getOnlineStreamResultAsJson,
inputFinished,
isEndpoint,
isOnlineStreamReady,
reset,
} from 'libsherpa_onnx.so';
import { FeatureConfig, Samples } from './NonStreamingAsr';
export class OnlineStream {
public handle: object;
constructor(handle: object) {
this.handle = handle;
}
// obj is {samples: samples, sampleRate: sampleRate}
// samples is a float32 array containing samples in the range [-1, 1]
// sampleRate is a number
acceptWaveform(obj: Samples) {
acceptWaveformOnline(this.handle, obj)
}
inputFinished() {
inputFinished(this.handle)
}
}
export class OnlineTransducerModelConfig {
public encoder: string = '';
public decoder: string = '';
public joiner: string = '';
}
export class OnlineParaformerModelConfig {
public encoder: string = '';
public decoder: string = '';
}
export class OnlineZipformer2CtcModelConfig {
public model: string = '';
}
export class OnlineModelConfig {
public transducer: OnlineTransducerModelConfig = new OnlineTransducerModelConfig();
public paraformer: OnlineParaformerModelConfig = new OnlineParaformerModelConfig();
public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
public tokens: string = '';
public numThreads: number = 1;
public provider: string = 'cpu';
public debug: boolean = false;
public modelType: string = '';
public modelingUnit: string = "cjkchar";
public bpeVocab: string = '';
}
export class OnlineCtcFstDecoderConfig {
public graph: string = '';
public maxActive: number = 3000;
}
export class OnlineRecognizerConfig {
public featConfig: FeatureConfig = new FeatureConfig();
public modelConfig: OnlineModelConfig = new OnlineModelConfig();
public decodingMethod: string = 'greedy_search';
public maxActivePaths: number = 4;
public enableEndpoint: boolean = false;
public rule1MinTrailingSilence: number = 2.4;
public rule2MinTrailingSilence: number = 1.2;
public rule3MinUtteranceLength: number = 20;
public hotwordsFile: string = '';
public hotwordsScore: number = 1.5;
public ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
public ruleFsts: string = '';
public ruleFars: string = '';
public blankPenalty: number = 0;
}
interface OnlineRecognizerResultJson {
text: string;
timestamps: number[];
tokens: string[];
}
export class OnlineRecognizerResult {
public text: string = '';
public tokens: string[] = [];
public timestamps: number[] = [];
public json: string = '';
}
export class OnlineRecognizer {
public handle: object;
public config: OnlineRecognizerConfig
constructor(config: OnlineRecognizerConfig, mgr?: object) {
this.handle = createOnlineRecognizer(config, mgr);
this.config = config
}
createStream(): OnlineStream {
const handle: object = createOnlineStream(this.handle);
return new OnlineStream(handle);
}
isReady(stream: OnlineStream): boolean {
return isOnlineStreamReady(this.handle, stream.handle);
}
decode(stream: OnlineStream) {
decodeOnlineStream(this.handle, stream.handle);
}
isEndpoint(stream: OnlineStream): boolean {
return isEndpoint(this.handle, stream.handle);
}
reset(stream: OnlineStream) {
reset(this.handle, stream.handle);
}
getResult(stream: OnlineStream): OnlineRecognizerResult {
const jsonStr: string = getOnlineStreamResultAsJson(this.handle, stream.handle);
let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson;
const r = new OnlineRecognizerResult()
r.text = o.text
r.timestamps = o.timestamps;
r.tokens = o.tokens;
r.json = jsonStr;
return r;
}
}