Committed by
GitHub
feat: add mic example for better compatibility (#1909)
Co-authored-by: wanghsinche <wanghsinche>
正在显示
3 个修改的文件
包含
229 行增加
和
1 行删除
| 1 | # Introduction | 1 | # Introduction |
| 2 | 2 | ||
| 3 | -Note: You need `Node >= 18`. | 3 | +Note: You need `Node >= 18`. |
| 4 | + | ||
| 5 | +Note: For Mac M1 and other silicon chip series, do check the example `test-online-paraformer-microphone-mic.js` | ||
| 4 | 6 | ||
| 5 | This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | 7 | This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). |
| 6 | 8 | ||
| @@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | @@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 278 | node ./test-online-paraformer-microphone.js | 280 | node ./test-online-paraformer-microphone.js |
| 279 | ``` | 281 | ``` |
| 280 | 282 | ||
| 283 | + | ||
| 284 | +## ./test-online-paraformer-microphone-mic.js | ||
| 285 | + | ||
| 286 | +[./test-online-paraformer-microphone-mic.js](./test-online-paraformer-microphone-mic.js) | ||
| 287 | +demonstrates how to do real-time speech recognition from microphone | ||
| 288 | +with a streaming Paraformer model. In the code we use | ||
| 289 | +[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english). | ||
| 290 | + | ||
| 291 | +It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it. | ||
| 292 | + | ||
| 293 | +You can use the following command to run it: | ||
| 294 | + | ||
| 295 | +```bash | ||
| 296 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 297 | +rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 298 | +node ./test-online-paraformer-microphone-mic.js | ||
| 299 | +``` | ||
| 300 | + | ||
| 301 | + | ||
| 281 | ## ./test-online-paraformer.js | 302 | ## ./test-online-paraformer.js |
| 282 | [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates | 303 | [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates |
| 283 | how to decode a file using a streaming Paraformer model. In the code we use | 304 | how to decode a file using a streaming Paraformer model. In the code we use |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it. | ||
| 3 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 4 | + | ||
| 5 | +function createOnlineRecognizer() { | ||
| 6 | + let onlineParaformerModelConfig = { | ||
| 7 | + encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx', | ||
| 8 | + decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx', | ||
| 9 | + }; | ||
| 10 | + | ||
| 11 | + let onlineModelConfig = { | ||
| 12 | + paraformer: onlineParaformerModelConfig, | ||
| 13 | + tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt', | ||
| 14 | + }; | ||
| 15 | + | ||
| 16 | + let recognizerConfig = { | ||
| 17 | + modelConfig: onlineModelConfig, | ||
| 18 | + enableEndpoint: 1, | ||
| 19 | + rule1MinTrailingSilence: 2.4, | ||
| 20 | + rule2MinTrailingSilence: 1.2, | ||
| 21 | + rule3MinUtteranceLength: 20, | ||
| 22 | + }; | ||
| 23 | + | ||
| 24 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 25 | +} | ||
| 26 | + | ||
| 27 | +/** | ||
| 28 | + * SpeechSession class, work as a session manager with the formatOutput function | ||
| 29 | + * Sample output: | ||
| 30 | +=== Automated Speech Recognition === | ||
| 31 | +Current Session #1 | ||
| 32 | +Time: 8:44:46 PM | ||
| 33 | +------------------------ | ||
| 34 | +Recognized Sentences: | ||
| 35 | +[8:44:43 PM] 1. it's so great three result is great great 她还支持中文 | ||
| 36 | +[8:44:46 PM] 2. 很厉 | ||
| 37 | +------------------------ | ||
| 38 | +Recognizing: 真的很厉害太厉害 | ||
| 39 | + | ||
| 40 | +*/ | ||
| 41 | +class SpeechSession { | ||
| 42 | + constructor() { | ||
| 43 | + this.startTime = Date.now(); | ||
| 44 | + this.sentences = []; | ||
| 45 | + this.currentText = ''; | ||
| 46 | + this.lastUpdateTime = Date.now(); | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + addOrUpdateText(text) { | ||
| 50 | + this.currentText = text; | ||
| 51 | + this.lastUpdateTime = Date.now(); | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + finalizeSentence() { | ||
| 55 | + if (this.currentText.trim()) { | ||
| 56 | + this.sentences.push({ | ||
| 57 | + text: this.currentText.trim(), | ||
| 58 | + timestamp: new Date().toLocaleTimeString() | ||
| 59 | + }); | ||
| 60 | + } | ||
| 61 | + this.currentText = ''; | ||
| 62 | + } | ||
| 63 | + | ||
| 64 | + shouldStartNewSession() { | ||
| 65 | + return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence | ||
| 66 | + } | ||
| 67 | +} | ||
| 68 | + | ||
| 69 | +function formatOutput() { | ||
| 70 | + clearConsole(); | ||
| 71 | + console.log('\n=== Automated Speech Recognition ==='); | ||
| 72 | + console.log(`Current Session #${sessionCount}`); | ||
| 73 | + console.log('Time:', new Date().toLocaleTimeString()); | ||
| 74 | + console.log('------------------------'); | ||
| 75 | + | ||
| 76 | + // 显示历史句子 | ||
| 77 | + if (currentSession.sentences.length > 0) { | ||
| 78 | + console.log('Recognized Sentences:'); | ||
| 79 | + currentSession.sentences.forEach((sentence, index) => { | ||
| 80 | + console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`); | ||
| 81 | + }); | ||
| 82 | + console.log('------------------------'); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + // 显示当前正在识别的内容 | ||
| 86 | + if (currentSession.currentText) { | ||
| 87 | + console.log('Recognizing:', currentSession.currentText); | ||
| 88 | + } | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + | ||
| 92 | +const recognizer = createOnlineRecognizer(); | ||
| 93 | +const stream = recognizer.createStream(); | ||
| 94 | +let currentSession = new SpeechSession(); | ||
| 95 | +let sessionCount = 1; | ||
| 96 | + | ||
| 97 | +function clearConsole() { | ||
| 98 | + process.stdout.write('\x1B[2J\x1B[0f'); | ||
| 99 | +} | ||
| 100 | + | ||
| 101 | + | ||
| 102 | +function exitHandler(options, exitCode) { | ||
| 103 | + if (options.cleanup) { | ||
| 104 | + console.log('\nCleaned up resources...'); | ||
| 105 | + micInstance.stop(); | ||
| 106 | + stream.free(); | ||
| 107 | + recognizer.free(); | ||
| 108 | + } | ||
| 109 | + if (exitCode || exitCode === 0) console.log('Exit code:', exitCode); | ||
| 110 | + if (options.exit) process.exit(); | ||
| 111 | +} | ||
| 112 | + | ||
| 113 | +const micInstance = mic({ | ||
| 114 | + rate: recognizer.config.featConfig.sampleRate, | ||
| 115 | + channels: 1, | ||
| 116 | + debug: false, // 关闭调试输出 | ||
| 117 | + device: 'default', | ||
| 118 | + bitwidth: 16, | ||
| 119 | + encoding: 'signed-integer', | ||
| 120 | + exitOnSilence: 6, | ||
| 121 | + fileType: 'raw' | ||
| 122 | +}); | ||
| 123 | + | ||
| 124 | +const micInputStream = micInstance.getAudioStream(); | ||
| 125 | + | ||
| 126 | +function startMic() { | ||
| 127 | + return new Promise((resolve, reject) => { | ||
| 128 | + micInputStream.once('startComplete', () => { | ||
| 129 | + console.log('Mic phone started.'); | ||
| 130 | + resolve(); | ||
| 131 | + }); | ||
| 132 | + | ||
| 133 | + micInputStream.once('error', (err) => { | ||
| 134 | + console.error('Mic phone start error:', err); | ||
| 135 | + reject(err); | ||
| 136 | + }); | ||
| 137 | + | ||
| 138 | + micInstance.start(); | ||
| 139 | + }); | ||
| 140 | +} | ||
| 141 | + | ||
| 142 | +micInputStream.on('data', buffer => { | ||
| 143 | + const int16Array = new Int16Array(buffer.buffer); | ||
| 144 | + const samples = new Float32Array(int16Array.length); | ||
| 145 | + | ||
| 146 | + for (let i = 0; i < int16Array.length; i++) { | ||
| 147 | + samples[i] = int16Array[i] / 32768.0; | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); | ||
| 151 | + | ||
| 152 | + while (recognizer.isReady(stream)) { | ||
| 153 | + recognizer.decode(stream); | ||
| 154 | + } | ||
| 155 | + | ||
| 156 | + const isEndpoint = recognizer.isEndpoint(stream); | ||
| 157 | + const text = recognizer.getResult(stream).text; | ||
| 158 | + | ||
| 159 | + if (text.length > 0) { | ||
| 160 | + // 检查是否需要开始新会话 | ||
| 161 | + if (currentSession.shouldStartNewSession()) { | ||
| 162 | + currentSession.finalizeSentence(); | ||
| 163 | + sessionCount++; | ||
| 164 | + currentSession = new SpeechSession(); | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + currentSession.addOrUpdateText(text); | ||
| 168 | + formatOutput(); | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + if (isEndpoint) { | ||
| 172 | + if (text.length > 0) { | ||
| 173 | + currentSession.finalizeSentence(); | ||
| 174 | + formatOutput(); | ||
| 175 | + } | ||
| 176 | + recognizer.reset(stream); | ||
| 177 | + } | ||
| 178 | +}); | ||
| 179 | + | ||
| 180 | +micInputStream.on('error', err => { | ||
| 181 | + console.error('Audio stream error:', err); | ||
| 182 | +}); | ||
| 183 | + | ||
| 184 | +micInputStream.on('close', () => { | ||
| 185 | + console.log('Mic phone closed.'); | ||
| 186 | +}); | ||
| 187 | + | ||
| 188 | +process.on('exit', exitHandler.bind(null, {cleanup: true})); | ||
| 189 | +process.on('SIGINT', exitHandler.bind(null, {exit: true})); | ||
| 190 | +process.on('SIGUSR1', exitHandler.bind(null, {exit: true})); | ||
| 191 | +process.on('SIGUSR2', exitHandler.bind(null, {exit: true})); | ||
| 192 | +process.on('uncaughtException', exitHandler.bind(null, {exit: true})); | ||
| 193 | + | ||
| 194 | +async function main() { | ||
| 195 | + try { | ||
| 196 | + console.log('Starting ...'); | ||
| 197 | + await startMic(); | ||
| 198 | + console.log('Initialized, waiting for speech ...'); | ||
| 199 | + formatOutput(); | ||
| 200 | + } catch (err) { | ||
| 201 | + console.error('Failed to initialize:', err); | ||
| 202 | + process.exit(1); | ||
| 203 | + } | ||
| 204 | +} | ||
| 205 | + | ||
| 206 | +main(); |
-
请 注册 或 登录 后发表评论