test_spoken_language_identification.js
1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx-node');
// Please download whisper multi-lingual models from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function createSpokenLanguageID() {
const config = {
whisper: {
encoder: './sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx',
decoder: './sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx',
},
debug: true,
numThreads: 1,
provider: 'cpu',
};
return new sherpa_onnx.SpokenLanguageIdentification(config);
}
const slid = createSpokenLanguageID();
const testWaves = [
'./spoken-language-identification-test-wavs/ar-arabic.wav',
'./spoken-language-identification-test-wavs/de-german.wav',
'./spoken-language-identification-test-wavs/en-english.wav',
'./spoken-language-identification-test-wavs/fr-french.wav',
'./spoken-language-identification-test-wavs/pt-portuguese.wav',
'./spoken-language-identification-test-wavs/es-spanish.wav',
'./spoken-language-identification-test-wavs/zh-chinese.wav',
];
const display = new Intl.DisplayNames(['en'], {type: 'language'})
for (let f of testWaves) {
const stream = slid.createStream();
const wave = sherpa_onnx.readWave(f);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
const lang = slid.compute(stream);
console.log(f.split('/')[2], lang, display.of(lang));
}