Fangjun Kuang
Committed by GitHub

Add speaker identification APIs for node-addon-api (#874)

... ... @@ -18,7 +18,7 @@ fi
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.20
# SHERPA_ONNX_VERSION=1.0.21
if [ -z $owner ]; then
owner=k2-fsa
... ...
... ... @@ -6,6 +6,16 @@ d=nodejs-addon-examples
echo "dir: $d"
cd $d
echo "----------speaker identification----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
git clone https://github.com/csukuangfj/sr-data
node ./test_speaker_identification.js
rm *.onnx
rm -rf sr-data
echo "----------spoken language identification----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
... ...
name: npm-addon-linux-aarch64
on:
push:
branches:
- node-addon
workflow_dispatch:
concurrency:
... ...
name: npm-addon-linux-x64
on:
push:
branches:
- node-addon
workflow_dispatch:
concurrency:
... ...
name: npm-addon-macos
on:
push:
branches:
- node-addon
workflow_dispatch:
concurrency:
... ...
name: npm-addon-win-x64
on:
push:
branches:
- node-addon
workflow_dispatch:
concurrency:
... ...
name: npm-addon
on:
push:
branches:
- node-addon
workflow_dispatch:
concurrency:
... ... @@ -52,7 +55,7 @@ jobs:
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
# SHERPA_ONNX_VERSION=1.0.20
# SHERPA_ONNX_VERSION=1.0.21
src_dir=.github/scripts/node-addon
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json
... ...
... ... @@ -201,3 +201,16 @@ node ./test_spoken_language_identification.js
npm install naudiodon2
node ./test_vad_spoken_language_identification_microphone.js
```
## Speaker identification
You can find more models at
<https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models>
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
git clone https://github.com/csukuangfj/sr-data
node ./test_speaker_identification.js
```
... ...
... ... @@ -2,7 +2,6 @@
const sherpa_onnx = require('sherpa-onnx-node');
const performance = require('perf_hooks').performance;
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
const config = {
... ...
// Copyright (c) 2024 Xiaomi Corporation
const sherpa_onnx = require('sherpa-onnx-node');
const assert = require('node:assert');
// Please download models files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
function createSpeakerEmbeddingExtractor() {
const config = {
model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx',
numThreads: 1,
debug: true,
};
return new sherpa_onnx.SpeakerEmbeddingExtractor(config);
}
function computeEmbedding(extractor, filename) {
const stream = extractor.createStream();
const wave = sherpa_onnx.readWave(filename);
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
return extractor.compute(stream);
}
const extractor = createSpeakerEmbeddingExtractor();
const manager = new sherpa_onnx.SpeakerEmbeddingManager(extractor.dim);
// Please download test files from
// https://github.com/csukuangfj/sr-data
const spk1Files = [
'./sr-data/enroll/fangjun-sr-1.wav',
'./sr-data/enroll/fangjun-sr-2.wav',
'./sr-data/enroll/fangjun-sr-3.wav',
];
let spk1Vec = [];
for (let f of spk1Files) {
spk1Vec.push(computeEmbedding(extractor, f));
}
const spk2Files = [
'./sr-data/enroll/leijun-sr-1.wav',
'./sr-data/enroll/leijun-sr-2.wav',
];
let spk2Vec = [];
for (let f of spk2Files) {
spk2Vec.push(computeEmbedding(extractor, f));
}
let ok = manager.addMulti({name: 'fangjun', v: spk1Vec});
assert.equal(ok, true);
ok = manager.addMulti({name: 'leijun', v: spk2Vec});
assert.equal(ok, true);
assert.equal(manager.getNumSpeakers(), 2);
assert.equal(manager.contains('fangjun'), true);
assert.equal(manager.contains('leijun'), true);
console.log('---All speakers---');
console.log(manager.getAllSpeakerNames());
console.log('------------');
const testFiles = [
'./sr-data/test/fangjun-test-sr-1.wav',
'./sr-data/test/leijun-test-sr-1.wav',
'./sr-data/test/liudehua-test-sr-1.wav',
];
const threshold = 0.6;
for (let f of testFiles) {
const embedding = computeEmbedding(extractor, f);
let name = manager.search({v: embedding, threshold: threshold});
if (name == '') {
name = '<Unknown>';
}
console.log(`${f}: ${name}`);
}
ok = manager.verify({
name: 'fangjun',
v: computeEmbedding(extractor, testFiles[0]),
threshold: threshold
});
assert.equal(ok, true);
ok = manager.remove('fangjun');
assert.equal(ok, true);
ok = manager.verify({
name: 'fangjun',
v: computeEmbedding(extractor, testFiles[0]),
threshold: threshold
});
assert.equal(ok, false);
assert.equal(manager.getNumSpeakers(), 1);
... ...
... ... @@ -21,6 +21,7 @@ set(srcs
src/non-streaming-asr.cc
src/non-streaming-tts.cc
src/sherpa-onnx-node-addon-api.cc
src/speaker-identification.cc
src/spoken-language-identification.cc
src/streaming-asr.cc
src/vad.cc
... ...
... ... @@ -4,6 +4,7 @@ const non_streaming_asr = require('./non-streaming-asr.js');
const non_streaming_tts = require('./non-streaming-tts.js');
const vad = require('./vad.js');
const slid = require('./spoken-language-identification.js');
const sid = require('./speaker-identification.js');
module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
... ... @@ -15,4 +16,6 @@ module.exports = {
Vad: vad.Vad,
CircularBuffer: vad.CircularBuffer,
SpokenLanguageIdentification: slid.SpokenLanguageIdentification,
SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
}
... ...
const addon = require('./addon.js');
const streaming_asr = require('./streaming-asr.js');
class SpeakerEmbeddingExtractor {
constructor(config) {
this.handle = addon.createSpeakerEmbeddingExtractor(config);
this.config = config;
this.dim = addon.speakerEmbeddingExtractorDim(this.handle);
}
createStream() {
return new streaming_asr.OnlineStream(
addon.speakerEmbeddingExtractorCreateStream(this.handle));
}
isReady(stream) {
return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
}
// return a float32 array
compute(stream) {
return addon.speakerEmbeddingExtractorComputeEmbedding(
this.handle, stream.handle);
}
}
function flatten(arrayList) {
let n = 0;
for (let i = 0; i < arrayList.length; ++i) {
n += arrayList[i].length;
}
let ans = new Float32Array(n);
let offset = 0;
for (let i = 0; i < arrayList.length; ++i) {
ans.set(arrayList[i], offset);
offset += arrayList[i].length;
}
return ans;
}
class SpeakerEmbeddingManager {
constructor(dim) {
this.handle = addon.createSpeakerEmbeddingManager(dim);
this.dim = dim;
}
/*
obj = {name: "xxx", v: a-float32-array}
*/
add(obj) {
return addon.speakerEmbeddingManagerAdd(this.handle, obj);
}
/*
* obj =
* {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn]
*/
addMulti(obj) {
const c = {
name: obj.name,
vv: flatten(obj.v),
n: obj.v.length,
};
return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c);
}
remove(name) {
return addon.speakerEmbeddingManagerRemove(this.handle, name);
}
/*
* obj = {v: a-float32-array, threshold: a-float }
*/
search(obj) {
return addon.speakerEmbeddingManagerSearch(this.handle, obj);
}
/*
* obj = {name: 'xxx', v: a-float32-array, threshold: a-float }
*/
verify(obj) {
return addon.speakerEmbeddingManagerVerify(this.handle, obj);
}
contains(name) {
return addon.speakerEmbeddingManagerContains(this.handle, name);
}
getNumSpeakers() {
return addon.speakerEmbeddingManagerNumSpeakers(this.handle);
}
getAllSpeakerNames() {
return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle);
}
}
module.exports = {
SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager,
}
... ...
... ... @@ -64,5 +64,6 @@ class OnlineRecognizer {
module.exports = {
OnlineRecognizer,
OnlineStream,
Display
}
... ...
... ... @@ -17,6 +17,8 @@ void InitWaveWriter(Napi::Env env, Napi::Object exports);
void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);
void InitSpeakerID(Napi::Env env, Napi::Object exports);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitNonStreamingAsr(env, exports);
... ... @@ -25,6 +27,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitWaveReader(env, exports);
InitWaveWriter(env, exports);
InitSpokenLanguageID(env, exports);
InitSpeakerID(env, exports);
return exports;
}
... ...
// scripts/node-addon-api/src/speaker-identification.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "macros.h" // NOLINT
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsObject()) {
Napi::TypeError::New(env, "You should pass an object as the only argument.")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxSpeakerEmbeddingExtractorConfig c;
memset(&c, 0, sizeof(c));
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
if (c.model) {
delete[] c.model;
}
if (c.provider) {
delete[] c.provider;
}
if (!extractor) {
Napi::TypeError::New(env, "Please check your config!")
.ThrowAsJavaScriptException();
return {};
}
return Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>::New(
env, const_cast<SherpaOnnxSpeakerEmbeddingExtractor *>(extractor),
[](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) {
SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor);
});
}
static Napi::Number SpeakerEmbeddingExtractorDimWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "Argument 0 should be a speaker embedding extractor pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingExtractor *extractor =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
return Napi::Number::New(env, dim);
}
static Napi::External<SherpaOnnxOnlineStream>
SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding extractor "
"pointer as the only argument")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingExtractor *extractor =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
const SherpaOnnxOnlineStream *stream =
SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor);
return Napi::External<SherpaOnnxOnlineStream>::New(
env, const_cast<SherpaOnnxOnlineStream *>(stream),
[](Napi::Env env, SherpaOnnxOnlineStream *stream) {
DestroyOnlineStream(stream);
});
}
static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "Argument 0 should be a speaker embedding extractor pointer.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsExternal()) {
Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingExtractor *extractor =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
SherpaOnnxOnlineStream *stream =
info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
int32_t is_ready =
SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream);
return Napi::Boolean::New(env, is_ready);
}
static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "Argument 0 should be a speaker embedding extractor pointer.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsExternal()) {
Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingExtractor *extractor =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>>().Data();
SherpaOnnxOnlineStream *stream =
info[1].As<Napi::External<SherpaOnnxOnlineStream>>().Data();
const float *v =
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream);
int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor);
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
env, const_cast<float *>(v), sizeof(float) * dim,
[](Napi::Env /*env*/, void *data) {
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
reinterpret_cast<float *>(data));
});
return Napi::Float32Array::New(env, dim, arrayBuffer, 0);
}
static Napi::External<SherpaOnnxSpeakerEmbeddingManager>
CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsNumber()) {
Napi::TypeError::New(env,
"You should pass an integer as the only argument.")
.ThrowAsJavaScriptException();
return {};
}
int32_t dim = info[0].As<Napi::Number>().Int32Value();
const SherpaOnnxSpeakerEmbeddingManager *manager =
SherpaOnnxCreateSpeakerEmbeddingManager(dim);
if (!manager) {
Napi::TypeError::New(env, "Please check your input dim!")
.ThrowAsJavaScriptException();
return {};
}
return Napi::External<SherpaOnnxSpeakerEmbeddingManager>::New(
env, const_cast<SherpaOnnxSpeakerEmbeddingManager *>(manager),
[](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) {
SherpaOnnxDestroySpeakerEmbeddingManager(manager);
});
}
static Napi::Boolean SpeakerEmbeddingManagerAddWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("v")) {
Napi::TypeError::New(env, "The argument object should have a field v")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("v").IsTypedArray()) {
Napi::TypeError::New(env, "The object['v'] should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("name")) {
Napi::TypeError::New(env, "The argument object should have a field name")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("name").IsString()) {
Napi::TypeError::New(env, "The object['name'] should be a string")
.ThrowAsJavaScriptException();
return {};
}
Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
Napi::String js_name = obj.Get("name").As<Napi::String>();
std::string name = js_name.Utf8Value();
int32_t ok =
SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data());
return Napi::Boolean::New(env, ok);
}
static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("vv")) {
Napi::TypeError::New(env, "The argument object should have a field vv")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("vv").IsTypedArray()) {
Napi::TypeError::New(env, "The object['vv'] should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("name")) {
Napi::TypeError::New(env, "The argument object should have a field name")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("name").IsString()) {
Napi::TypeError::New(env, "The object['name'] should be a string")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("n")) {
Napi::TypeError::New(env, "The argument object should have a field n")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("n").IsNumber()) {
Napi::TypeError::New(env, "The object['n'] should be an integer")
.ThrowAsJavaScriptException();
return {};
}
Napi::Float32Array v = obj.Get("vv").As<Napi::Float32Array>();
Napi::String js_name = obj.Get("name").As<Napi::String>();
int32_t n = obj.Get("n").As<Napi::Number>().Int32Value();
std::string name = js_name.Utf8Value();
int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
manager, name.c_str(), v.Data(), n);
return Napi::Boolean::New(env, ok);
}
static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsString()) {
Napi::TypeError::New(env, "Argument 1 should be string")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::String js_name = info[1].As<Napi::String>();
std::string name = js_name.Utf8Value();
int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str());
return Napi::Boolean::New(env, ok);
}
static Napi::String SpeakerEmbeddingManagerSearchWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("v")) {
Napi::TypeError::New(env, "The argument object should have a field v")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("v").IsTypedArray()) {
Napi::TypeError::New(env, "The object['v'] should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("threshold")) {
Napi::TypeError::New(env,
"The argument object should have a field threshold")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("threshold").IsNumber()) {
Napi::TypeError::New(env, "The object['threshold'] should be a float")
.ThrowAsJavaScriptException();
return {};
}
Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
const char *name =
SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold);
const char *p = name;
if (!p) {
p = "";
}
Napi::String js_name = Napi::String::New(env, p);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name);
return js_name;
}
static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("v")) {
Napi::TypeError::New(env, "The argument object should have a field v")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("v").IsTypedArray()) {
Napi::TypeError::New(env, "The object['v'] should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("threshold")) {
Napi::TypeError::New(env,
"The argument object should have a field threshold")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("threshold").IsNumber()) {
Napi::TypeError::New(env, "The object['threshold'] should be a float")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("name")) {
Napi::TypeError::New(env, "The argument object should have a field name")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("name").IsString()) {
Napi::TypeError::New(env, "The object['name'] should be a string")
.ThrowAsJavaScriptException();
return {};
}
Napi::Float32Array v = obj.Get("v").As<Napi::Float32Array>();
float threshold = obj.Get("threshold").As<Napi::Number>().FloatValue();
Napi::String js_name = obj.Get("name").As<Napi::String>();
std::string name = js_name.Utf8Value();
int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(),
v.Data(), threshold);
return Napi::Boolean::New(env, found);
}
static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 2 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsString()) {
Napi::TypeError::New(env, "Argument 1 should be a string")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
Napi::String js_name = info[1].As<Napi::String>();
std::string name = js_name.Utf8Value();
int32_t exists =
SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str());
return Napi::Boolean::New(env, exists);
}
static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
return Napi::Number::New(env, num_speakers);
}
static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env,
"You should pass a speaker embedding manager pointer "
"as the first argument.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxSpeakerEmbeddingManager *manager =
info[0].As<Napi::External<SherpaOnnxSpeakerEmbeddingManager>>().Data();
int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager);
if (num_speakers == 0) {
return {};
}
const char *const *all_speaker_names =
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
Napi::Array ans = Napi::Array::New(env, num_speakers);
for (int32_t i = 0; i != num_speakers; ++i) {
ans[i] = Napi::String::New(env, all_speaker_names[i]);
}
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names);
return ans;
}
void InitSpeakerID(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"),
Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"),
Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"),
Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingExtractorIsReady"),
Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"),
Napi::Function::New(env,
SpeakerEmbeddingExtractorComputeEmbeddingWrapper));
exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"),
Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"),
Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"),
Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"),
Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"),
Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"),
Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper));
exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"),
Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"),
Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper));
exports.Set(
Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"),
Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper));
}
... ...