feat: add mic example for better compatibility (#1909)

Co-authored-by: wanghsinche <wanghsinche>

feat: add mic example for better compatibility (#1909)
Co-authored-by: wanghsinche <wanghsinche>
wanghsinche · GitHub
Commit 7774e35749765997e45c9ded6ceca6ed4a178afb 7774e357 1 parent 94728bfb
nodejs-examples/README.md
nodejs-examples/package.json
nodejs-examples/test-online-paraformer-microphone-mic.js
--- a/nodejs-examples/README.md
查看文件 @7774e35
+++ b/nodejs-examples/README.md
查看文件 @7774e35
@@ -2,6 +2,8 @@
 
 Note: You need `Node >= 18`. 
 
+ Note: For Mac M1 and other silicon chip series, do check the example `test-online-paraformer-microphone-mic.js` 
+ 
 This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
 
 It uses WebAssembly to wrap `sherpa-onnx` for NodeJS and it does not support multiple threads.
@@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
 node ./test-online-paraformer-microphone.js
 ```
 
+ 
+ ## ./test-online-paraformer-microphone-mic.js
+ 
+ [./test-online-paraformer-microphone-mic.js](./test-online-paraformer-microphone-mic.js)
+ demonstrates how to do real-time speech recognition from microphone
+ with a streaming Paraformer model. In the code we use
+ [sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
+ 
+ It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+ node ./test-online-paraformer-microphone-mic.js
+ ```
+ 
+ 
 ## ./test-online-paraformer.js
 [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
 how to decode a file using a streaming Paraformer model. In the code we use
--- a/nodejs-examples/package.json
查看文件 @7774e35
+++ b/nodejs-examples/package.json
查看文件 @7774e35
 {
   "dependencies": {
+     "mic": "^2.1.2",
     "naudiodon2": "^2.4.0",
     "sherpa-onnx": "^1.10.45",
     "wav": "^1.0.2"
--- a/nodejs-examples/test-online-paraformer-microphone-mic.js 0 → 100644
查看文件 @7774e35
+++ b/nodejs-examples/test-online-paraformer-microphone-mic.js 0 → 100644
查看文件 @7774e35
+ // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
+ const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOnlineRecognizer() {
+   let onlineParaformerModelConfig = {
+     encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
+     decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
+   };
+ 
+   let onlineModelConfig = {
+     paraformer: onlineParaformerModelConfig,
+     tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
+   };
+ 
+   let recognizerConfig = {
+     modelConfig: onlineModelConfig,
+     enableEndpoint: 1,
+     rule1MinTrailingSilence: 2.4,
+     rule2MinTrailingSilence: 1.2,
+     rule3MinUtteranceLength: 20,
+   };
+ 
+   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
+ }
+ 
+ /**
+  * SpeechSession class, work as a session manager with the formatOutput function
+  * Sample output:
+ === Automated Speech Recognition ===
+ Current Session #1
+ Time: 8:44:46 PM
+ ------------------------
+ Recognized Sentences:
+ [8:44:43 PM] 1. it's so great three result is great great 她还支持中文
+ [8:44:46 PM] 2. 很厉
+ ------------------------
+ Recognizing: 真的很厉害太厉害
+ 
+ */
+ class SpeechSession {
+   constructor() {
+     this.startTime = Date.now();
+     this.sentences = [];
+     this.currentText = '';
+     this.lastUpdateTime = Date.now();
+   }
+ 
+   addOrUpdateText(text) {
+     this.currentText = text;
+     this.lastUpdateTime = Date.now();
+   }
+ 
+   finalizeSentence() {
+     if (this.currentText.trim()) {
+       this.sentences.push({
+         text: this.currentText.trim(),
+         timestamp: new Date().toLocaleTimeString()
+       });
+     }
+     this.currentText = '';
+   }
+ 
+   shouldStartNewSession() {
+     return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence
+   }
+ }
+ 
+ function formatOutput() {
+     clearConsole();
+     console.log('\n=== Automated Speech Recognition ===');
+     console.log(`Current Session #${sessionCount}`);
+     console.log('Time:', new Date().toLocaleTimeString());
+     console.log('------------------------');
+     
+     // 显示历史句子
+     if (currentSession.sentences.length > 0) {
+       console.log('Recognized Sentences:');
+       currentSession.sentences.forEach((sentence, index) => {
+         console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
+       });
+       console.log('------------------------');
+     }
+     
+     // 显示当前正在识别的内容
+     if (currentSession.currentText) {
+       console.log('Recognizing:', currentSession.currentText);
+     }
+   }
+   
+ 
+ const recognizer = createOnlineRecognizer();
+ const stream = recognizer.createStream();
+ let currentSession = new SpeechSession();
+ let sessionCount = 1;
+ 
+ function clearConsole() {
+   process.stdout.write('\x1B[2J\x1B[0f');
+ }
+ 
+ 
+ function exitHandler(options, exitCode) {
+   if (options.cleanup) {
+     console.log('\nCleaned up resources...');
+     micInstance.stop();
+     stream.free();
+     recognizer.free();
+   }
+   if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
+   if (options.exit) process.exit();
+ }
+ 
+ const micInstance = mic({
+   rate: recognizer.config.featConfig.sampleRate,
+   channels: 1,
+   debug: false, // 关闭调试输出
+   device: 'default',
+   bitwidth: 16,
+   encoding: 'signed-integer',
+   exitOnSilence: 6,
+   fileType: 'raw'
+ });
+ 
+ const micInputStream = micInstance.getAudioStream();
+ 
+ function startMic() {
+   return new Promise((resolve, reject) => {
+     micInputStream.once('startComplete', () => {
+       console.log('Mic phone started.');
+       resolve();
+     });
+     
+     micInputStream.once('error', (err) => {
+       console.error('Mic phone start error:', err);
+       reject(err);
+     });
+     
+     micInstance.start();
+   });
+ }
+ 
+ micInputStream.on('data', buffer => {
+   const int16Array = new Int16Array(buffer.buffer);
+   const samples = new Float32Array(int16Array.length);
+   
+   for (let i = 0; i < int16Array.length; i++) {
+     samples[i] = int16Array[i] / 32768.0;
+   }
+ 
+   stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
+ 
+   while (recognizer.isReady(stream)) {
+     recognizer.decode(stream);
+   }
+ 
+   const isEndpoint = recognizer.isEndpoint(stream);
+   const text = recognizer.getResult(stream).text;
+ 
+   if (text.length > 0) {
+     // 检查是否需要开始新会话
+     if (currentSession.shouldStartNewSession()) {
+       currentSession.finalizeSentence();
+       sessionCount++;
+       currentSession = new SpeechSession();
+     }
+     
+     currentSession.addOrUpdateText(text);
+     formatOutput();
+   }
+ 
+   if (isEndpoint) {
+     if (text.length > 0) {
+       currentSession.finalizeSentence();
+       formatOutput();
+     }
+     recognizer.reset(stream);
+   }
+ });
+ 
+ micInputStream.on('error', err => {
+   console.error('Audio stream error:', err);
+ });
+ 
+ micInputStream.on('close', () => {
+   console.log('Mic phone closed.');
+ });
+ 
+ process.on('exit', exitHandler.bind(null, {cleanup: true}));
+ process.on('SIGINT', exitHandler.bind(null, {exit: true}));
+ process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
+ process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
+ process.on('uncaughtException', exitHandler.bind(null, {exit: true}));
+ 
+ async function main() {
+   try {
+     console.log('Starting ...');
+     await startMic();
+     console.log('Initialized, waiting for speech ...');
+     formatOutput();
+   } catch (err) {
+     console.error('Failed to initialize:', err);
+     process.exit(1);
+   }
+ }
+ 
+ main();
\ No newline at end of file