Fangjun Kuang
Committed by GitHub

Add speaker identification APIs for node-addon-api (#874)

@@ -18,7 +18,7 @@ fi @@ -18,7 +18,7 @@ fi
18 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) 18 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
19 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" 19 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
20 20
21 -# SHERPA_ONNX_VERSION=1.0.20 21 +# SHERPA_ONNX_VERSION=1.0.21
22 22
23 if [ -z $owner ]; then 23 if [ -z $owner ]; then
24 owner=k2-fsa 24 owner=k2-fsa
@@ -6,6 +6,16 @@ d=nodejs-addon-examples @@ -6,6 +6,16 @@ d=nodejs-addon-examples
6 echo "dir: $d" 6 echo "dir: $d"
7 cd $d 7 cd $d
8 8
  9 +echo "----------speaker identification----------"
  10 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  11 +
  12 +git clone https://github.com/csukuangfj/sr-data
  13 +
  14 +node ./test_speaker_identification.js
  15 +
  16 +rm *.onnx
  17 +rm -rf sr-data
  18 +
9 echo "----------spoken language identification----------" 19 echo "----------spoken language identification----------"
10 20
11 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 21 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
1 name: npm-addon-linux-aarch64 1 name: npm-addon-linux-aarch64
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - node-addon
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
1 name: npm-addon-linux-x64 1 name: npm-addon-linux-x64
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - node-addon
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
1 name: npm-addon-macos 1 name: npm-addon-macos
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - node-addon
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
1 name: npm-addon-win-x64 1 name: npm-addon-win-x64
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - node-addon
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
1 name: npm-addon 1 name: npm-addon
2 2
3 on: 3 on:
  4 + push:
  5 + branches:
  6 + - node-addon
4 workflow_dispatch: 7 workflow_dispatch:
5 8
6 concurrency: 9 concurrency:
@@ -52,7 +55,7 @@ jobs: @@ -52,7 +55,7 @@ jobs:
52 55
53 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) 56 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
54 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" 57 echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
55 - # SHERPA_ONNX_VERSION=1.0.20 58 + # SHERPA_ONNX_VERSION=1.0.21
56 59
57 src_dir=.github/scripts/node-addon 60 src_dir=.github/scripts/node-addon
58 sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json 61 sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
@@ -201,3 +201,16 @@ node ./test_spoken_language_identification.js @@ -201,3 +201,16 @@ node ./test_spoken_language_identification.js
201 npm install naudiodon2 201 npm install naudiodon2
202 node ./test_vad_spoken_language_identification_microphone.js 202 node ./test_vad_spoken_language_identification_microphone.js
203 ``` 203 ```
  204 +
  205 +## Speaker identification
  206 +
  207 +You can find more models at
  208 +<https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models>
  209 +
  210 +```bash
  211 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  212 +
  213 +git clone https://github.com/csukuangfj/sr-data
  214 +
  215 +node ./test_speaker_identification.js
  216 +```
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 const sherpa_onnx = require('sherpa-onnx-node'); 2 const sherpa_onnx = require('sherpa-onnx-node');
3 const performance = require('perf_hooks').performance; 3 const performance = require('perf_hooks').performance;
4 4
5 -  
6 // Please download test files from 5 // Please download test files from
7 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models 6 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
8 const config = { 7 const config = {
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +const assert = require('node:assert');
  4 +
  5 +// Please download models files from
  6 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  7 +function createSpeakerEmbeddingExtractor() {
  8 + const config = {
  9 + model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
  10 + numThreads: 1,
  11 + debug: true,
  12 + };
  13 + return new sherpa_onnx.SpeakerEmbeddingExtractor(config);
  14 +}
  15 +
  16 +function computeEmbedding(extractor, filename) {
  17 + const stream = extractor.createStream();
  18 + const wave = sherpa_onnx.readWave(filename);
  19 + stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  20 + return extractor.compute(stream);
  21 +}
  22 +
  23 +const extractor = createSpeakerEmbeddingExtractor();
  24 +const manager = new sherpa_onnx.SpeakerEmbeddingManager(extractor.dim);
  25 +
  26 +// Please download test files from
  27 +// https://github.com/csukuangfj/sr-data
  28 +const spk1Files = [
  29 + './sr-data/enroll/fangjun-sr-1.wav',
  30 + './sr-data/enroll/fangjun-sr-2.wav',
  31 + './sr-data/enroll/fangjun-sr-3.wav',
  32 +];
  33 +
  34 +let spk1Vec = [];
  35 +for (let f of spk1Files) {
  36 + spk1Vec.push(computeEmbedding(extractor, f));
  37 +}
  38 +
  39 +const spk2Files = [
  40 + './sr-data/enroll/leijun-sr-1.wav',
  41 + './sr-data/enroll/leijun-sr-2.wav',
  42 +];
  43 +
  44 +let spk2Vec = [];
  45 +for (let f of spk2Files) {
  46 + spk2Vec.push(computeEmbedding(extractor, f));
  47 +}
  48 +
  49 +let ok = manager.addMulti({name: 'fangjun', v: spk1Vec});
  50 +assert.equal(ok, true);
  51 +
  52 +ok = manager.addMulti({name: 'leijun', v: spk2Vec});
  53 +assert.equal(ok, true);
  54 +
  55 +assert.equal(manager.getNumSpeakers(), 2);
  56 +
  57 +assert.equal(manager.contains('fangjun'), true);
  58 +assert.equal(manager.contains('leijun'), true);
  59 +
  60 +console.log('---All speakers---');
  61 +
  62 +console.log(manager.getAllSpeakerNames());
  63 +console.log('------------');
  64 +
  65 +const testFiles = [
  66 + './sr-data/test/fangjun-test-sr-1.wav',
  67 + './sr-data/test/leijun-test-sr-1.wav',
  68 + './sr-data/test/liudehua-test-sr-1.wav',
  69 +];
  70 +
  71 +const threshold = 0.6;
  72 +
  73 +for (let f of testFiles) {
  74 + const embedding = computeEmbedding(extractor, f);
  75 +
  76 + let name = manager.search({v: embedding, threshold: threshold});
  77 + if (name == '') {
  78 + name = '<Unknown>';
  79 + }
  80 + console.log(`${f}: ${name}`);
  81 +}
  82 +
  83 +
  84 +ok = manager.verify({
  85 + name: 'fangjun',
  86 + v: computeEmbedding(extractor, testFiles[0]),
  87 + threshold: threshold
  88 +});
  89 +
  90 +assert.equal(ok, true);
  91 +
  92 +ok = manager.remove('fangjun');
  93 +assert.equal(ok, true);
  94 +
  95 +ok = manager.verify({
  96 + name: 'fangjun',
  97 + v: computeEmbedding(extractor, testFiles[0]),
  98 + threshold: threshold
  99 +});
  100 +assert.equal(ok, false);
  101 +
  102 +assert.equal(manager.getNumSpeakers(), 1);
@@ -21,6 +21,7 @@ set(srcs @@ -21,6 +21,7 @@ set(srcs
21 src/non-streaming-asr.cc 21 src/non-streaming-asr.cc
22 src/non-streaming-tts.cc 22 src/non-streaming-tts.cc
23 src/sherpa-onnx-node-addon-api.cc 23 src/sherpa-onnx-node-addon-api.cc
  24 + src/speaker-identification.cc
24 src/spoken-language-identification.cc 25 src/spoken-language-identification.cc
25 src/streaming-asr.cc 26 src/streaming-asr.cc
26 src/vad.cc 27 src/vad.cc
@@ -4,6 +4,7 @@ const non_streaming_asr = require('./non-streaming-asr.js'); @@ -4,6 +4,7 @@ const non_streaming_asr = require('./non-streaming-asr.js');
4 const non_streaming_tts = require('./non-streaming-tts.js'); 4 const non_streaming_tts = require('./non-streaming-tts.js');
5 const vad = require('./vad.js'); 5 const vad = require('./vad.js');
6 const slid = require('./spoken-language-identification.js'); 6 const slid = require('./spoken-language-identification.js');
  7 +const sid = require('./speaker-identification.js');
7 8
8 module.exports = { 9 module.exports = {
9 OnlineRecognizer: streaming_asr.OnlineRecognizer, 10 OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -15,4 +16,6 @@ module.exports = { @@ -15,4 +16,6 @@ module.exports = {
15 Vad: vad.Vad, 16 Vad: vad.Vad,
16 CircularBuffer: vad.CircularBuffer, 17 CircularBuffer: vad.CircularBuffer,
17 SpokenLanguageIdentification: slid.SpokenLanguageIdentification, 18 SpokenLanguageIdentification: slid.SpokenLanguageIdentification,
  19 + SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor,
  20 + SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
18 } 21 }
  1 +const addon = require('./addon.js');
  2 +const streaming_asr = require('./streaming-asr.js');
  3 +
  4 +class SpeakerEmbeddingExtractor {
  5 + constructor(config) {
  6 + this.handle = addon.createSpeakerEmbeddingExtractor(config);
  7 + this.config = config;
  8 + this.dim = addon.speakerEmbeddingExtractorDim(this.handle);
  9 + }
  10 +
  11 + createStream() {
  12 + return new streaming_asr.OnlineStream(
  13 + addon.speakerEmbeddingExtractorCreateStream(this.handle));
  14 + }
  15 +
  16 + isReady(stream) {
  17 + return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
  18 + }
  19 +
  20 + // return a float32 array
  21 + compute(stream) {
  22 + return addon.speakerEmbeddingExtractorComputeEmbedding(
  23 + this.handle, stream.handle);
  24 + }
  25 +}
  26 +
  27 +function flatten(arrayList) {
  28 + let n = 0;
  29 + for (let i = 0; i < arrayList.length; ++i) {
  30 + n += arrayList[i].length;
  31 + }
  32 + let ans = new Float32Array(n);
  33 +
  34 + let offset = 0;
  35 + for (let i = 0; i < arrayList.length; ++i) {
  36 + ans.set(arrayList[i], offset);
  37 + offset += arrayList[i].length;
  38 + }
  39 + return ans;
  40 +}
  41 +
  42 +class SpeakerEmbeddingManager {
  43 + constructor(dim) {
  44 + this.handle = addon.createSpeakerEmbeddingManager(dim);
  45 + this.dim = dim;
  46 + }
  47 +
  48 + /*
  49 + obj = {name: "xxx", v: a-float32-array}
  50 + */
  51 + add(obj) {
  52 + return addon.speakerEmbeddingManagerAdd(this.handle, obj);
  53 + }
  54 +
  55 + /*
  56 + * obj =
  57 + * {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn]
  58 + */
  59 + addMulti(obj) {
  60 + const c = {
  61 + name: obj.name,
  62 + vv: flatten(obj.v),
  63 + n: obj.v.length,
  64 + };
  65 + return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c);
  66 + }
  67 +
  68 + remove(name) {
  69 + return addon.speakerEmbeddingManagerRemove(this.handle, name);
  70 + }
  71 +
  72 + /*
  73 + * obj = {v: a-float32-array, threshold: a-float }
  74 + */
  75 + search(obj) {
  76 + return addon.speakerEmbeddingManagerSearch(this.handle, obj);
  77 + }
  78 +
  79 + /*
  80 + * obj = {name: 'xxx', v: a-float32-array, threshold: a-float }
  81 + */
  82 + verify(obj) {
  83 + return addon.speakerEmbeddingManagerVerify(this.handle, obj);
  84 + }
  85 +
  86 + contains(name) {
  87 + return addon.speakerEmbeddingManagerContains(this.handle, name);
  88 + }
  89 +
  90 + getNumSpeakers() {
  91 + return addon.speakerEmbeddingManagerNumSpeakers(this.handle);
  92 + }
  93 +
  94 + getAllSpeakerNames() {
  95 + return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle);
  96 + }
  97 +}
  98 +
  99 +module.exports = {
  100 + SpeakerEmbeddingExtractor,
  101 + SpeakerEmbeddingManager,
  102 +}
@@ -64,5 +64,6 @@ class OnlineRecognizer { @@ -64,5 +64,6 @@ class OnlineRecognizer {
64 64
65 module.exports = { 65 module.exports = {
66 OnlineRecognizer, 66 OnlineRecognizer,
  67 + OnlineStream,
67 Display 68 Display
68 } 69 }
@@ -17,6 +17,8 @@ void InitWaveWriter(Napi::Env env, Napi::Object exports); @@ -17,6 +17,8 @@ void InitWaveWriter(Napi::Env env, Napi::Object exports);
17 17
18 void InitSpokenLanguageID(Napi::Env env, Napi::Object exports); 18 void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);
19 19
  20 +void InitSpeakerID(Napi::Env env, Napi::Object exports);
  21 +
20 Napi::Object Init(Napi::Env env, Napi::Object exports) { 22 Napi::Object Init(Napi::Env env, Napi::Object exports) {
21 InitStreamingAsr(env, exports); 23 InitStreamingAsr(env, exports);
22 InitNonStreamingAsr(env, exports); 24 InitNonStreamingAsr(env, exports);
@@ -25,6 +27,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { @@ -25,6 +27,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
25 InitWaveReader(env, exports); 27 InitWaveReader(env, exports);
26 InitWaveWriter(env, exports); 28 InitWaveWriter(env, exports);
27 InitSpokenLanguageID(env, exports); 29 InitSpokenLanguageID(env, exports);
  30 + InitSpeakerID(env, exports);
28 31
29 return exports; 32 return exports;
30 } 33 }
  1 +// scripts/node-addon-api/src/speaker-identification.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +#include <sstream>
  5 +
  6 +#include "macros.h" // NOLINT
  7 +#include "napi.h" // NOLINT
  8 +#include "sherpa-onnx/c-api/c-api.h"
  9 +
  10 +static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
  11 +CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
  12 + Napi::Env env = info.Env();
  13 + if (info.Length() != 1) {
  14 + std::ostringstream os;
  15 + os << "Expect only 1 argument. Given: " << info.Length();
  16 +
  17 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  18 +
  19 + return {};
  20 + }
  21 +
  22 + if (!info[0].IsObject()) {
  23 + Napi::TypeError::New(env, "You should pass an object as the only argument.")
  24 + .ThrowAsJavaScriptException();
  25 +
  26 + return {};
  27 + }
  28 +
  29 + Napi::Object o = info[0].As<Napi::Object>();
  30 +
  31 + SherpaOnnxSpeakerEmbeddingExtractorConfig c;
  32 + memset(&c, 0, sizeof(c));
  33 +
  34 + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  35 + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
  36 +
  37 + if (o.Has("debug") &&
  38 + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
  39 + if (o.Get("debug").IsBoolean()) {
  40 + c.debug = o.Get("debug").As<Napi::Boolean>().Value();
  41 + } else {
  42 + c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
  43 + }
  44 + }
  45 +
  46 + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
  47 +
  48 + const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
  49 + SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
  50 +
  51 + if (c.model) {
  52 + delete[] c.model;
  53 + }
  54 +
  55 + if (c.provider) {
  56 + delete[] c.provider;
  57 + }
  58 +
  59 + if (!extractor) {
  60 + Napi::TypeError::New(env, "Please check your config!")
  61 + .ThrowAsJavaScriptException();
  62 +
  63 + return {};
  64 + }
  65 +
  66 + return Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>::New(
  67 + env, const_cast<SherpaOnnxSpeakerEmbeddingExtractor *>(extractor),
  68 + [](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) {
  69 + SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor);
  70 + });
  71 +}
  72 +
  73 +static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
  74 + const Napi::CallbackInfo &info) {
  75 + Napi::Env env = info.Env();
  76 +
  77 + if (info.Length() != 1) {
  78 + std::ostringstream os;
  79 + os << "Expect only 1 argument. Given: " << info.Length();
  80 +
  81 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  82 +
  83 + return {};
  84 + }
  85 +
  86 + if (!info[0].IsExternal()) {
  87 + Napi::TypeError::New(
  88 + env, "Argument 0 should be a speaker embedding extractor pointer.")
  89 + .ThrowAsJavaScriptException();
  90 +
  91 + return {};
  92 + }
  93 +
  94 + SherpaOnnxSpeakerEmbeddingExtractor *extractor =
  95 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
  96 +
  97 + int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
  98 +
  99 + return Napi::Number::New(env, dim);
  100 +}
  101 +
  102 +static Napi::External<SherpaOnnxOnlineStream>
  103 +SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
  104 + Napi::Env env = info.Env();
  105 + if (info.Length() != 1) {
  106 + std::ostringstream os;
  107 + os << "Expect only 1 argument. Given: " << info.Length();
  108 +
  109 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  110 +
  111 + return {};
  112 + }
  113 +
  114 + if (!info[0].IsExternal()) {
  115 + Napi::TypeError::New(env,
  116 + "You should pass a speaker embedding extractor "
  117 + "pointer as the only argument")
  118 + .ThrowAsJavaScriptException();
  119 +
  120 + return {};
  121 + }
  122 +
  123 + SherpaOnnxSpeakerEmbeddingExtractor *extractor =
  124 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
  125 +
  126 + const SherpaOnnxOnlineStream *stream =
  127 + SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor);
  128 +
  129 + return Napi::External<SherpaOnnxOnlineStream>::New(
  130 + env, const_cast<SherpaOnnxOnlineStream *>(stream),
  131 + [](Napi::Env env, SherpaOnnxOnlineStream *stream) {
  132 + DestroyOnlineStream(stream);
  133 + });
  134 +}
  135 +
  136 +static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
  137 + const Napi::CallbackInfo &info) {
  138 + Napi::Env env = info.Env();
  139 + if (info.Length() != 2) {
  140 + std::ostringstream os;
  141 + os << "Expect only 2 arguments. Given: " << info.Length();
  142 +
  143 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  144 +
  145 + return {};
  146 + }
  147 +
  148 + if (!info[0].IsExternal()) {
  149 + Napi::TypeError::New(
  150 + env, "Argument 0 should be a speaker embedding extractor pointer.")
  151 + .ThrowAsJavaScriptException();
  152 +
  153 + return {};
  154 + }
  155 +
  156 + if (!info[1].IsExternal()) {
  157 + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
  158 + .ThrowAsJavaScriptException();
  159 +
  160 + return {};
  161 + }
  162 +
  163 + SherpaOnnxSpeakerEmbeddingExtractor *extractor =
  164 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
  165 +
  166 + SherpaOnnxOnlineStream *stream =
  167 + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
  168 +
  169 + int32_t is_ready =
  170 + SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream);
  171 +
  172 + return Napi::Boolean::New(env, is_ready);
  173 +}
  174 +
  175 +static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
  176 + const Napi::CallbackInfo &info) {
  177 + Napi::Env env = info.Env();
  178 + if (info.Length() != 2) {
  179 + std::ostringstream os;
  180 + os << "Expect only 2 arguments. Given: " << info.Length();
  181 +
  182 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  183 +
  184 + return {};
  185 + }
  186 +
  187 + if (!info[0].IsExternal()) {
  188 + Napi::TypeError::New(
  189 + env, "Argument 0 should be a speaker embedding extractor pointer.")
  190 + .ThrowAsJavaScriptException();
  191 +
  192 + return {};
  193 + }
  194 +
  195 + if (!info[1].IsExternal()) {
  196 + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
  197 + .ThrowAsJavaScriptException();
  198 +
  199 + return {};
  200 + }
  201 +
  202 + SherpaOnnxSpeakerEmbeddingExtractor *extractor =
  203 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
  204 +
  205 + SherpaOnnxOnlineStream *stream =
  206 + info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
  207 +
  208 + const float *v =
  209 + SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream);
  210 +
  211 + int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
  212 +
  213 + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
  214 + env, const_cast<float *>(v), sizeof(float) * dim,
  215 + [](Napi::Env /*env*/, void *data) {
  216 + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
  217 + reinterpret_cast<float *>(data));
  218 + });
  219 +
  220 + return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
  221 +}
  222 +
  223 +static Napi::External<SherpaOnnxSpeakerEmbeddingManager>
  224 +CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) {
  225 + Napi::Env env = info.Env();
  226 + if (info.Length() != 1) {
  227 + std::ostringstream os;
  228 + os << "Expect only 1 argument. Given: " << info.Length();
  229 +
  230 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  231 +
  232 + return {};
  233 + }
  234 +
  235 + if (!info[0].IsNumber()) {
  236 + Napi::TypeError::New(env,
  237 + "You should pass an integer as the only argument.")
  238 + .ThrowAsJavaScriptException();
  239 +
  240 + return {};
  241 + }
  242 +
  243 + int32_t dim = info[0].As<Napi::Number>().Int32Value();
  244 +
  245 + const SherpaOnnxSpeakerEmbeddingManager *manager =
  246 + SherpaOnnxCreateSpeakerEmbeddingManager(dim);
  247 +
  248 + if (!manager) {
  249 + Napi::TypeError::New(env, "Please check your input dim!")
  250 + .ThrowAsJavaScriptException();
  251 +
  252 + return {};
  253 + }
  254 +
  255 + return Napi::External<SherpaOnnxSpeakerEmbeddingManager>::New(
  256 + env, const_cast<SherpaOnnxSpeakerEmbeddingManager *>(manager),
  257 + [](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) {
  258 + SherpaOnnxDestroySpeakerEmbeddingManager(manager);
  259 + });
  260 +}
  261 +
  262 +static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
  263 + const Napi::CallbackInfo &info) {
  264 + Napi::Env env = info.Env();
  265 + if (info.Length() != 2) {
  266 + std::ostringstream os;
  267 + os << "Expect only 2 arguments. Given: " << info.Length();
  268 +
  269 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  270 +
  271 + return {};
  272 + }
  273 +
  274 + if (!info[0].IsExternal()) {
  275 + Napi::TypeError::New(env,
  276 + "You should pass a speaker embedding manager pointer "
  277 + "as the first argument.")
  278 + .ThrowAsJavaScriptException();
  279 +
  280 + return {};
  281 + }
  282 +
  283 + if (!info[1].IsObject()) {
  284 + Napi::TypeError::New(env, "Argument 1 should be an object")
  285 + .ThrowAsJavaScriptException();
  286 +
  287 + return {};
  288 + }
  289 +
  290 + SherpaOnnxSpeakerEmbeddingManager *manager =
  291 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  292 +
  293 + Napi::Object obj = info[1].As<Napi::Object>();
  294 +
  295 + if (!obj.Has("v")) {
  296 + Napi::TypeError::New(env, "The argument object should have a field v")
  297 + .ThrowAsJavaScriptException();
  298 +
  299 + return {};
  300 + }
  301 +
  302 + if (!obj.Get("v").IsTypedArray()) {
  303 + Napi::TypeError::New(env, "The object['v'] should be a typed array")
  304 + .ThrowAsJavaScriptException();
  305 +
  306 + return {};
  307 + }
  308 +
  309 + if (!obj.Has("name")) {
  310 + Napi::TypeError::New(env, "The argument object should have a field name")
  311 + .ThrowAsJavaScriptException();
  312 +
  313 + return {};
  314 + }
  315 +
  316 + if (!obj.Get("name").IsString()) {
  317 + Napi::TypeError::New(env, "The object['name'] should be a string")
  318 + .ThrowAsJavaScriptException();
  319 +
  320 + return {};
  321 + }
  322 +
  323 + Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  324 + Napi::String js_name = obj.Get("name").As<Napi::String>();
  325 + std::string name = js_name.Utf8Value();
  326 +
  327 + int32_t ok =
  328 + SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data());
  329 + return Napi::Boolean::New(env, ok);
  330 +}
  331 +
  332 +static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
  333 + const Napi::CallbackInfo &info) {
  334 + Napi::Env env = info.Env();
  335 + if (info.Length() != 2) {
  336 + std::ostringstream os;
  337 + os << "Expect only 2 arguments. Given: " << info.Length();
  338 +
  339 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  340 +
  341 + return {};
  342 + }
  343 +
  344 + if (!info[0].IsExternal()) {
  345 + Napi::TypeError::New(env,
  346 + "You should pass a speaker embedding manager pointer "
  347 + "as the first argument.")
  348 + .ThrowAsJavaScriptException();
  349 +
  350 + return {};
  351 + }
  352 +
  353 + if (!info[1].IsObject()) {
  354 + Napi::TypeError::New(env, "Argument 1 should be an object")
  355 + .ThrowAsJavaScriptException();
  356 +
  357 + return {};
  358 + }
  359 +
  360 + SherpaOnnxSpeakerEmbeddingManager *manager =
  361 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  362 +
  363 + Napi::Object obj = info[1].As<Napi::Object>();
  364 +
  365 + if (!obj.Has("vv")) {
  366 + Napi::TypeError::New(env, "The argument object should have a field vv")
  367 + .ThrowAsJavaScriptException();
  368 +
  369 + return {};
  370 + }
  371 +
  372 + if (!obj.Get("vv").IsTypedArray()) {
  373 + Napi::TypeError::New(env, "The object['vv'] should be a typed array")
  374 + .ThrowAsJavaScriptException();
  375 +
  376 + return {};
  377 + }
  378 +
  379 + if (!obj.Has("name")) {
  380 + Napi::TypeError::New(env, "The argument object should have a field name")
  381 + .ThrowAsJavaScriptException();
  382 +
  383 + return {};
  384 + }
  385 +
  386 + if (!obj.Get("name").IsString()) {
  387 + Napi::TypeError::New(env, "The object['name'] should be a string")
  388 + .ThrowAsJavaScriptException();
  389 +
  390 + return {};
  391 + }
  392 +
  393 + if (!obj.Has("n")) {
  394 + Napi::TypeError::New(env, "The argument object should have a field n")
  395 + .ThrowAsJavaScriptException();
  396 +
  397 + return {};
  398 + }
  399 +
  400 + if (!obj.Get("n").IsNumber()) {
  401 + Napi::TypeError::New(env, "The object['n'] should be an integer")
  402 + .ThrowAsJavaScriptException();
  403 +
  404 + return {};
  405 + }
  406 +
  407 + Napi::Float32Array v = obj.Get("vv").As<Napi::Float32Array>();
  408 + Napi::String js_name = obj.Get("name").As<Napi::String>();
  409 + int32_t n = obj.Get("n").As<Napi::Number>().Int32Value();
  410 +
  411 + std::string name = js_name.Utf8Value();
  412 +
  413 + int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
  414 + manager, name.c_str(), v.Data(), n);
  415 +
  416 + return Napi::Boolean::New(env, ok);
  417 +}
  418 +
  419 +static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
  420 + const Napi::CallbackInfo &info) {
  421 + Napi::Env env = info.Env();
  422 + if (info.Length() != 2) {
  423 + std::ostringstream os;
  424 + os << "Expect only 2 arguments. Given: " << info.Length();
  425 +
  426 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  427 +
  428 + return {};
  429 + }
  430 +
  431 + if (!info[0].IsExternal()) {
  432 + Napi::TypeError::New(env,
  433 + "You should pass a speaker embedding manager pointer "
  434 + "as the first argument.")
  435 + .ThrowAsJavaScriptException();
  436 +
  437 + return {};
  438 + }
  439 +
  440 + if (!info[1].IsString()) {
  441 + Napi::TypeError::New(env, "Argument 1 should be string")
  442 + .ThrowAsJavaScriptException();
  443 +
  444 + return {};
  445 + }
  446 +
  447 + SherpaOnnxSpeakerEmbeddingManager *manager =
  448 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  449 +
  450 + Napi::String js_name = info[1].As<Napi::String>();
  451 + std::string name = js_name.Utf8Value();
  452 +
  453 + int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str());
  454 +
  455 + return Napi::Boolean::New(env, ok);
  456 +}
  457 +
  458 +static Napi::String SpeakerEmbeddingManagerSearchWrapper(
  459 + const Napi::CallbackInfo &info) {
  460 + Napi::Env env = info.Env();
  461 + if (info.Length() != 2) {
  462 + std::ostringstream os;
  463 + os << "Expect only 2 arguments. Given: " << info.Length();
  464 +
  465 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  466 +
  467 + return {};
  468 + }
  469 +
  470 + if (!info[0].IsExternal()) {
  471 + Napi::TypeError::New(env,
  472 + "You should pass a speaker embedding manager pointer "
  473 + "as the first argument.")
  474 + .ThrowAsJavaScriptException();
  475 +
  476 + return {};
  477 + }
  478 +
  479 + if (!info[1].IsObject()) {
  480 + Napi::TypeError::New(env, "Argument 1 should be an object")
  481 + .ThrowAsJavaScriptException();
  482 +
  483 + return {};
  484 + }
  485 +
  486 + SherpaOnnxSpeakerEmbeddingManager *manager =
  487 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  488 +
  489 + Napi::Object obj = info[1].As<Napi::Object>();
  490 +
  491 + if (!obj.Has("v")) {
  492 + Napi::TypeError::New(env, "The argument object should have a field v")
  493 + .ThrowAsJavaScriptException();
  494 +
  495 + return {};
  496 + }
  497 +
  498 + if (!obj.Get("v").IsTypedArray()) {
  499 + Napi::TypeError::New(env, "The object['v'] should be a typed array")
  500 + .ThrowAsJavaScriptException();
  501 +
  502 + return {};
  503 + }
  504 +
  505 + if (!obj.Has("threshold")) {
  506 + Napi::TypeError::New(env,
  507 + "The argument object should have a field threshold")
  508 + .ThrowAsJavaScriptException();
  509 +
  510 + return {};
  511 + }
  512 +
  513 + if (!obj.Get("threshold").IsNumber()) {
  514 + Napi::TypeError::New(env, "The object['threshold'] should be a float")
  515 + .ThrowAsJavaScriptException();
  516 +
  517 + return {};
  518 + }
  519 +
  520 + Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  521 + float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
  522 +
  523 + const char *name =
  524 + SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold);
  525 + const char *p = name;
  526 + if (!p) {
  527 + p = "";
  528 + }
  529 +
  530 + Napi::String js_name = Napi::String::New(env, p);
  531 + SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name);
  532 +
  533 + return js_name;
  534 +}
  535 +
  536 +static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
  537 + const Napi::CallbackInfo &info) {
  538 + Napi::Env env = info.Env();
  539 + if (info.Length() != 2) {
  540 + std::ostringstream os;
  541 + os << "Expect only 2 arguments. Given: " << info.Length();
  542 +
  543 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  544 +
  545 + return {};
  546 + }
  547 +
  548 + if (!info[0].IsExternal()) {
  549 + Napi::TypeError::New(env,
  550 + "You should pass a speaker embedding manager pointer "
  551 + "as the first argument.")
  552 + .ThrowAsJavaScriptException();
  553 +
  554 + return {};
  555 + }
  556 +
  557 + if (!info[1].IsObject()) {
  558 + Napi::TypeError::New(env, "Argument 1 should be an object")
  559 + .ThrowAsJavaScriptException();
  560 +
  561 + return {};
  562 + }
  563 +
  564 + SherpaOnnxSpeakerEmbeddingManager *manager =
  565 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  566 +
  567 + Napi::Object obj = info[1].As<Napi::Object>();
  568 +
  569 + if (!obj.Has("v")) {
  570 + Napi::TypeError::New(env, "The argument object should have a field v")
  571 + .ThrowAsJavaScriptException();
  572 +
  573 + return {};
  574 + }
  575 +
  576 + if (!obj.Get("v").IsTypedArray()) {
  577 + Napi::TypeError::New(env, "The object['v'] should be a typed array")
  578 + .ThrowAsJavaScriptException();
  579 +
  580 + return {};
  581 + }
  582 +
  583 + if (!obj.Has("threshold")) {
  584 + Napi::TypeError::New(env,
  585 + "The argument object should have a field threshold")
  586 + .ThrowAsJavaScriptException();
  587 +
  588 + return {};
  589 + }
  590 +
  591 + if (!obj.Get("threshold").IsNumber()) {
  592 + Napi::TypeError::New(env, "The object['threshold'] should be a float")
  593 + .ThrowAsJavaScriptException();
  594 +
  595 + return {};
  596 + }
  597 +
  598 + if (!obj.Has("name")) {
  599 + Napi::TypeError::New(env, "The argument object should have a field name")
  600 + .ThrowAsJavaScriptException();
  601 +
  602 + return {};
  603 + }
  604 +
  605 + if (!obj.Get("name").IsString()) {
  606 + Napi::TypeError::New(env, "The object['name'] should be a string")
  607 + .ThrowAsJavaScriptException();
  608 +
  609 + return {};
  610 + }
  611 +
  612 + Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
  613 + float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
  614 +
  615 + Napi::String js_name = obj.Get("name").As<Napi::String>();
  616 + std::string name = js_name.Utf8Value();
  617 +
  618 + int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(),
  619 + v.Data(), threshold);
  620 +
  621 + return Napi::Boolean::New(env, found);
  622 +}
  623 +
  624 +static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
  625 + const Napi::CallbackInfo &info) {
  626 + Napi::Env env = info.Env();
  627 + if (info.Length() != 2) {
  628 + std::ostringstream os;
  629 + os << "Expect only 2 arguments. Given: " << info.Length();
  630 +
  631 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  632 +
  633 + return {};
  634 + }
  635 +
  636 + if (!info[0].IsExternal()) {
  637 + Napi::TypeError::New(env,
  638 + "You should pass a speaker embedding manager pointer "
  639 + "as the first argument.")
  640 + .ThrowAsJavaScriptException();
  641 +
  642 + return {};
  643 + }
  644 +
  645 + if (!info[1].IsString()) {
  646 + Napi::TypeError::New(env, "Argument 1 should be a string")
  647 + .ThrowAsJavaScriptException();
  648 +
  649 + return {};
  650 + }
  651 +
  652 + SherpaOnnxSpeakerEmbeddingManager *manager =
  653 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  654 +
  655 + Napi::String js_name = info[1].As<Napi::String>();
  656 + std::string name = js_name.Utf8Value();
  657 +
  658 + int32_t exists =
  659 + SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str());
  660 +
  661 + return Napi::Boolean::New(env, exists);
  662 +}
  663 +
  664 +static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
  665 + const Napi::CallbackInfo &info) {
  666 + Napi::Env env = info.Env();
  667 + if (info.Length() != 1) {
  668 + std::ostringstream os;
  669 + os << "Expect only 1 argument. Given: " << info.Length();
  670 +
  671 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  672 +
  673 + return {};
  674 + }
  675 +
  676 + if (!info[0].IsExternal()) {
  677 + Napi::TypeError::New(env,
  678 + "You should pass a speaker embedding manager pointer "
  679 + "as the first argument.")
  680 + .ThrowAsJavaScriptException();
  681 +
  682 + return {};
  683 + }
  684 +
  685 + SherpaOnnxSpeakerEmbeddingManager *manager =
  686 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  687 +
  688 + int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
  689 +
  690 + return Napi::Number::New(env, num_speakers);
  691 +}
  692 +
  693 +static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
  694 + const Napi::CallbackInfo &info) {
  695 + Napi::Env env = info.Env();
  696 + if (info.Length() != 1) {
  697 + std::ostringstream os;
  698 + os << "Expect only 1 argument. Given: " << info.Length();
  699 +
  700 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  701 +
  702 + return {};
  703 + }
  704 +
  705 + if (!info[0].IsExternal()) {
  706 + Napi::TypeError::New(env,
  707 + "You should pass a speaker embedding manager pointer "
  708 + "as the first argument.")
  709 + .ThrowAsJavaScriptException();
  710 +
  711 + return {};
  712 + }
  713 +
  714 + SherpaOnnxSpeakerEmbeddingManager *manager =
  715 + info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
  716 +
  717 + int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
  718 + if (num_speakers == 0) {
  719 + return {};
  720 + }
  721 +
  722 + const char *const *all_speaker_names =
  723 + SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
  724 +
  725 + Napi::Array ans = Napi::Array::New(env, num_speakers);
  726 + for (int32_t i = 0; i != num_speakers; ++i) {
  727 + ans[i] = Napi::String::New(env, all_speaker_names[i]);
  728 + }
  729 + SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names);
  730 + return ans;
  731 +}
  732 +
  733 +void InitSpeakerID(Napi::Env env, Napi::Object exports) {
  734 + exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"),
  735 + Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper));
  736 +
  737 + exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"),
  738 + Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper));
  739 +
  740 + exports.Set(
  741 + Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"),
  742 + Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper));
  743 +
  744 + exports.Set(
  745 + Napi::String::New(env, "speakerEmbeddingExtractorIsReady"),
  746 + Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper));
  747 +
  748 + exports.Set(
  749 + Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"),
  750 + Napi::Function::New(env,
  751 + SpeakerEmbeddingExtractorComputeEmbeddingWrapper));
  752 +
  753 + exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"),
  754 + Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper));
  755 +
  756 + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"),
  757 + Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper));
  758 +
  759 + exports.Set(
  760 + Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"),
  761 + Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper));
  762 +
  763 + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"),
  764 + Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper));
  765 +
  766 + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"),
  767 + Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper));
  768 +
  769 + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"),
  770 + Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper));
  771 +
  772 + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"),
  773 + Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper));
  774 +
  775 + exports.Set(
  776 + Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"),
  777 + Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper));
  778 +
  779 + exports.Set(
  780 + Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"),
  781 + Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper));
  782 +}