test-online-paraformer-microphone-mic.js 5.4 KB
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
const sherpa_onnx = require('sherpa-onnx');

function createOnlineRecognizer() {
  let onlineParaformerModelConfig = {
    encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
    decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
  };

  let onlineModelConfig = {
    paraformer: onlineParaformerModelConfig,
    tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
  };

  let recognizerConfig = {
    modelConfig: onlineModelConfig,
    enableEndpoint: 1,
    rule1MinTrailingSilence: 2.4,
    rule2MinTrailingSilence: 1.2,
    rule3MinUtteranceLength: 20,
  };

  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}

/**
 * SpeechSession class, work as a session manager with the formatOutput function
 * Sample output:
=== Automated Speech Recognition ===
Current Session #1
Time: 8:44:46 PM
------------------------
Recognized Sentences:
[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
[8:44:46 PM] 2. 很厉
------------------------
Recognizing: 真的很厉害太厉害

*/
class SpeechSession {
  constructor() {
    this.startTime = Date.now();
    this.sentences = [];
    this.currentText = '';
    this.lastUpdateTime = Date.now();
  }

  addOrUpdateText(text) {
    this.currentText = text;
    this.lastUpdateTime = Date.now();
  }

  finalizeSentence() {
    if (this.currentText.trim()) {
      this.sentences.push({
        text: this.currentText.trim(),
        timestamp: new Date().toLocaleTimeString()
      });
    }
    this.currentText = '';
  }

  shouldStartNewSession() {
    return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence
  }
}

function formatOutput() {
    clearConsole();
    console.log('\n=== Automated Speech Recognition ===');
    console.log(`Current Session #${sessionCount}`);
    console.log('Time:', new Date().toLocaleTimeString());
    console.log('------------------------');
    
    // 显示历史句子
    if (currentSession.sentences.length > 0) {
      console.log('Recognized Sentences:');
      currentSession.sentences.forEach((sentence, index) => {
        console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
      });
      console.log('------------------------');
    }
    
    // 显示当前正在识别的内容
    if (currentSession.currentText) {
      console.log('Recognizing:', currentSession.currentText);
    }
  }
  

const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
let currentSession = new SpeechSession();
let sessionCount = 1;

function clearConsole() {
  process.stdout.write('\x1B[2J\x1B[0f');
}


function exitHandler(options, exitCode) {
  if (options.cleanup) {
    console.log('\nCleaned up resources...');
    micInstance.stop();
    stream.free();
    recognizer.free();
  }
  if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
  if (options.exit) process.exit();
}

const micInstance = mic({
  rate: recognizer.config.featConfig.sampleRate,
  channels: 1,
  debug: false, // 关闭调试输出
  device: 'default',
  bitwidth: 16,
  encoding: 'signed-integer',
  exitOnSilence: 6,
  fileType: 'raw'
});

const micInputStream = micInstance.getAudioStream();

function startMic() {
  return new Promise((resolve, reject) => {
    micInputStream.once('startComplete', () => {
      console.log('Mic phone started.');
      resolve();
    });
    
    micInputStream.once('error', (err) => {
      console.error('Mic phone start error:', err);
      reject(err);
    });
    
    micInstance.start();
  });
}

micInputStream.on('data', buffer => {
  const int16Array = new Int16Array(buffer.buffer);
  const samples = new Float32Array(int16Array.length);
  
  for (let i = 0; i < int16Array.length; i++) {
    samples[i] = int16Array[i] / 32768.0;
  }

  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);

  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }

  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text;

  if (text.length > 0) {
    // 检查是否需要开始新会话
    if (currentSession.shouldStartNewSession()) {
      currentSession.finalizeSentence();
      sessionCount++;
      currentSession = new SpeechSession();
    }
    
    currentSession.addOrUpdateText(text);
    formatOutput();
  }

  if (isEndpoint) {
    if (text.length > 0) {
      currentSession.finalizeSentence();
      formatOutput();
    }
    recognizer.reset(stream);
  }
});

micInputStream.on('error', err => {
  console.error('Audio stream error:', err);
});

micInputStream.on('close', () => {
  console.log('Mic phone closed.');
});

process.on('exit', exitHandler.bind(null, {cleanup: true}));
process.on('SIGINT', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
process.on('uncaughtException', exitHandler.bind(null, {exit: true}));

async function main() {
  try {
    console.log('Starting ...');
    await startMic();
    console.log('Initialized, waiting for speech ...');
    formatOutput();
  } catch (err) {
    console.error('Failed to initialize:', err);
    process.exit(1);
  }
}

main();