test_offline_speaker_diarization.js 2.1 KB
// Copyright (c)  2024  Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');

// clang-format off
/* Please use the following commands to download files
   used in this script

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

 */
// clang-format on

const config = {
  segmentation: {
    pyannote: {
      model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx',
    },
  },
  embedding: {
    model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
  },
  clustering: {
    // since we know that the test wave file
    // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters
    // here. if you don't have such information, please set numClusters to -1
    numClusters: 4,

    // If numClusters is not -1, then threshold is ignored.
    //
    // A larger threshold leads to fewer clusters, i.e., fewer speakers
    // A smaller threshold leads to more clusters, i.e., more speakers
    // You need to tune it by yourself.
    threshold: 0.5,
  },

  // If a segment is shorter than minDurationOn, we discard it
  minDurationOn: 0.2,  // in seconds

  // If the gap between two segments is less than minDurationOff, then we
  // merge these two segments into a single one
  minDurationOff: 0.5,  // in seconds
};

const waveFilename = './0-four-speakers-zh.wav';

const sd = new sherpa_onnx.OfflineSpeakerDiarization(config);
console.log('Started')

const wave = sherpa_onnx.readWave(waveFilename);
if (sd.sampleRate != wave.sampleRate) {
  throw new Error(
      `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`);
}

const segments = sd.process(wave.samples);
console.log(segments);