test-online-zipformer2-ctc-hlg.js
2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createOnlineRecognizer() {
let onlineZipformer2CtcModelConfig = {
model:
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
};
let onlineModelConfig = {
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 0,
modelType: '',
};
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
ctcFstDecoderConfig: {
graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
maxActive: 3000,
}
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
const reader = new wav.Reader();
const readable = new Readable().wrap(reader);
function decode(samples) {
stream.acceptWaveform(gSampleRate, samples);
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
const text = recognizer.getResult(stream).text;
console.log(text);
}
let gSampleRate = 16000;
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
gSampleRate = sampleRate;
if (audioFormat != 1) {
throw new Error(`Only support PCM format. Given ${audioFormat}`);
}
if (channels != 1) {
throw new Error(`Only a single channel. Given ${channel}`);
}
if (bitDepth != 16) {
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
}
});
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
.pipe(reader)
.on('finish', function(err) {
// tail padding
const floatSamples =
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
decode(floatSamples);
stream.free();
recognizer.free();
});
readable.on('readable', function() {
let chunk;
while ((chunk = readable.read()) != null) {
const int16Samples = new Int16Array(
chunk.buffer, chunk.byteOffset,
chunk.length / Int16Array.BYTES_PER_ELEMENT);
const floatSamples = new Float32Array(int16Samples.length);
for (let i = 0; i < floatSamples.length; i++) {
floatSamples[i] = int16Samples[i] / 32768.0;
}
decode(floatSamples);
}
});