test-offline-transducer.js
3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};
let modelConfig = {
transducer: {
encoder:
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx',
decoder:
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx',
joiner:
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
},
paraformer: {
model: '',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
language: '',
task: '',
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'transducer',
};
let lmConfig = {
model: '',
scale: 1.0,
};
let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOfflineRecognizer(config);
}
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
const reader = new wav.Reader();
const readable = new Readable().wrap(reader);
const buf = [];
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
if (sampleRate != recognizer.config.featConfig.sampleRate) {
throw new Error(`Only support sampleRate ${
recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
}
if (audioFormat != 1) {
throw new Error(`Only support PCM format. Given ${audioFormat}`);
}
if (channels != 1) {
throw new Error(`Only a single channel. Given ${channel}`);
}
if (bitDepth != 16) {
throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
}
});
fs.createReadStream(waveFilename, {'highWaterMark': 4096})
.pipe(reader)
.on('finish', function(err) {
// tail padding
const floatSamples =
new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
buf.push(floatSamples);
const flattened =
Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const text = recognizer.getResult(stream).text;
console.log(text);
stream.free();
recognizer.free();
});
readable.on('readable', function() {
let chunk;
while ((chunk = readable.read()) != null) {
const int16Samples = new Int16Array(
chunk.buffer, chunk.byteOffset,
chunk.length / Int16Array.BYTES_PER_ELEMENT);
const floatSamples = new Float32Array(int16Samples.length);
for (let i = 0; i < floatSamples.length; i++) {
floatSamples[i] = int16Samples[i] / 32768.0;
}
buf.push(floatSamples);
}
});