Add C API for speaker embedding extractor. (#711)

Fangjun Kuang · GitHub
Commit 2e0bccad36502c7a985d84c88feef4ba4e8cd404 2e0bccad 1 parent 638f48f4
.github/scripts/test-c-api.sh
.github/workflows/linux.yaml
.github/workflows/macos.yaml
.github/workflows/windows-x64.yaml
.gitignore
c-api-examples/CMakeLists.txt
c-api-examples/asr-microphone-example/c-api-alsa.cc
c-api-examples/decode-file-c-api.c
c-api-examples/speaker-identification-c-api.c
c-api-examples/spoken-language-identification-c-api.c
ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
python-api-examples/offline-tts-play.py
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/offline-tts-vits-impl.h
sherpa-onnx/csrc/offline-tts.h
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
--- a/.github/scripts/test-c-api.sh
查看文件 @2e0bcca
+++ b/.github/scripts/test-c-api.sh
查看文件 @2e0bcca
 #!/usr/bin/env bash
 
- set -e
+ set -ex
 
 log() {
   # This function is from espnet
@@ -9,6 +9,7 @@ log() {
 }
 
 echo "SLID_EXE is $SLID_EXE"
+ echo "SID_EXE is $SID_EXE"
 echo "PATH: $PATH"
 
 
@@ -24,3 +25,15 @@ rm sherpa-onnx-whisper-tiny.tar.bz2
 $SLID_EXE
 
 rm -rf sherpa-onnx-whisper-tiny*
+ 
+ log "------------------------------------------------------------"
+ log "Download file for speaker identification and verification   "
+ log "------------------------------------------------------------"
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+ git clone https://github.com/csukuangfj/sr-data
+ 
+ $SID_EXE
+ 
+ rm -fv *.onnx
+ rm -rf sr-data
--- a/.github/workflows/linux.yaml
查看文件 @2e0bcca
+++ b/.github/workflows/linux.yaml
查看文件 @2e0bcca
@@ -124,11 +124,12 @@ jobs:
           name: release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
           path: build/bin/*
 
-       - name: Test spoken language identification (C API)
+       - name: Test C API
         shell: bash
         run: |
           export PATH=$PWD/build/bin:$PATH
           export SLID_EXE=spoken-language-identification-c-api
+           export SID_EXE=speaker-identification-c-api
 
           .github/scripts/test-c-api.sh
 
--- a/.github/workflows/macos.yaml
查看文件 @2e0bcca
+++ b/.github/workflows/macos.yaml
查看文件 @2e0bcca
@@ -103,11 +103,12 @@ jobs:
           otool -L build/bin/sherpa-onnx
           otool -l build/bin/sherpa-onnx
 
-       - name: Test spoken language identification (C API)
+       - name: Test C API
         shell: bash
         run: |
           export PATH=$PWD/build/bin:$PATH
           export SLID_EXE=spoken-language-identification-c-api
+           export SID_EXE=speaker-identification-c-api
 
           .github/scripts/test-c-api.sh
 
--- a/.github/workflows/windows-x64.yaml
查看文件 @2e0bcca
+++ b/.github/workflows/windows-x64.yaml
查看文件 @2e0bcca
@@ -70,11 +70,12 @@ jobs:
 
           ls -lh ./bin/Release/sherpa-onnx.exe
 
-       - name: Test spoken language identification (C API)
+       - name: Test C API
         shell: bash
         run: |
           export PATH=$PWD/build/bin/Release:$PATH
           export SLID_EXE=spoken-language-identification-c-api.exe
+           export SID_EXE=speaker-identification-c-api.exe
 
           .github/scripts/test-c-api.sh
 
--- a/.gitignore
查看文件 @2e0bcca
+++ b/.gitignore
查看文件 @2e0bcca
@@ -87,3 +87,4 @@ vits-coqui-*
 vits-mms-*
 *.tar.bz2
 sherpa-onnx-paraformer-trilingual-zh-cantonese-en
+ sr-data
--- a/c-api-examples/CMakeLists.txt
查看文件 @2e0bcca
+++ b/c-api-examples/CMakeLists.txt
查看文件 @2e0bcca
@@ -12,6 +12,9 @@ endif()
 add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
 target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
 
+ add_executable(speaker-identification-c-api speaker-identification-c-api.c)
+ target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
+ 
 if(SHERPA_ONNX_HAS_ALSA)
   add_subdirectory(./asr-microphone-example)
 elseif((UNIX AND NOT APPLE) OR LINUX)
--- a/c-api-examples/asr-microphone-example/c-api-alsa.cc
查看文件 @2e0bcca
+++ b/c-api-examples/asr-microphone-example/c-api-alsa.cc
查看文件 @2e0bcca
@@ -188,10 +188,11 @@ int32_t main(int32_t argc, char *argv[]) {
     }
   }
 
-   SherpaOnnxOnlineRecognizer *recognizer = CreateOnlineRecognizer(&config);
-   SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
+   const SherpaOnnxOnlineRecognizer *recognizer =
+       CreateOnlineRecognizer(&config);
+   const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
 
-   SherpaOnnxDisplay *display = CreateDisplay(50);
+   const SherpaOnnxDisplay *display = CreateDisplay(50);
   int32_t segment_id = 0;
 
   const char *device_name = argv[context.index];
--- a/c-api-examples/decode-file-c-api.c
查看文件 @2e0bcca
+++ b/c-api-examples/decode-file-c-api.c
查看文件 @2e0bcca
@@ -162,10 +162,11 @@ int32_t main(int32_t argc, char *argv[]) {
     }
   }
 
-   SherpaOnnxOnlineRecognizer *recognizer = CreateOnlineRecognizer(&config);
-   SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
+   const SherpaOnnxOnlineRecognizer *recognizer =
+       CreateOnlineRecognizer(&config);
+   const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
 
-   SherpaOnnxDisplay *display = CreateDisplay(50);
+   const SherpaOnnxDisplay *display = CreateDisplay(50);
   int32_t segment_id = 0;
 
   const char *wav_filename = argv[context.index];
--- a/c-api-examples/speaker-identification-c-api.c 0 → 100644
查看文件 @2e0bcca
+++ b/c-api-examples/speaker-identification-c-api.c 0 → 100644
查看文件 @2e0bcca
+ // c-api-examples/speaker-identification-c-api.c
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
+ 
+ // We assume you have pre-downloaded the speaker embedding extractor model
+ // from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+ //
+ // An example command to download
+ // "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
+ // is given below:
+ //
+ // clang-format off
+ //
+ // wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+ //
+ // clang-format on
+ //
+ // Also, please download the test wave files from
+ //
+ // https://github.com/csukuangfj/sr-data
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ static const float *ComputeEmbedding(
+     const SherpaOnnxSpeakerEmbeddingExtractor *ex, const char *wav_filename) {
+   const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+   if (wave == NULL) {
+     fprintf(stderr, "Failed to read %s\n", wav_filename);
+     exit(-1);
+   }
+ 
+   const SherpaOnnxOnlineStream *stream =
+       SherpaOnnxSpeakerEmbeddingExtractorCreateStream(ex);
+ 
+   AcceptWaveform(stream, wave->sample_rate, wave->samples, wave->num_samples);
+   InputFinished(stream);
+ 
+   if (!SherpaOnnxSpeakerEmbeddingExtractorIsReady(ex, stream)) {
+     fprintf(stderr, "The input wave file %s is too short!\n", wav_filename);
+     exit(-1);
+   }
+ 
+   // we will free `v` outside of this function
+   const float *v =
+       SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(ex, stream);
+ 
+   DestroyOnlineStream(stream);
+   SherpaOnnxFreeWave(wave);
+ 
+   // Remeber to free v to avoid memory leak
+   return v;
+ }
+ 
+ int32_t main() {
+   SherpaOnnxSpeakerEmbeddingExtractorConfig config;
+ 
+   memset(&config, 0, sizeof(config));
+ 
+   // please download the model from
+   // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+   config.model = "./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx";
+ 
+   config.num_threads = 1;
+   config.debug = 0;
+   config.provider = "cpu";
+ 
+   const SherpaOnnxSpeakerEmbeddingExtractor *ex =
+       SherpaOnnxCreateSpeakerEmbeddingExtractor(&config);
+   if (!ex) {
+     fprintf(stderr, "Failed to create speaker embedding extractor");
+     return -1;
+   }
+ 
+   int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(ex);
+ 
+   const SherpaOnnxSpeakerEmbeddingManager *manager =
+       SherpaOnnxCreateSpeakerEmbeddingManager(dim);
+ 
+   // Please download the test data from
+   // https://github.com/csukuangfj/sr-data
+   const char *spk1_1 = "./sr-data/enroll/fangjun-sr-1.wav";
+   const char *spk1_2 = "./sr-data/enroll/fangjun-sr-2.wav";
+   const char *spk1_3 = "./sr-data/enroll/fangjun-sr-3.wav";
+ 
+   const char *spk2_1 = "./sr-data/enroll/leijun-sr-1.wav";
+   const char *spk2_2 = "./sr-data/enroll/leijun-sr-2.wav";
+ 
+   const float *spk1_vec[4] = {NULL};
+   spk1_vec[0] = ComputeEmbedding(ex, spk1_1);
+   spk1_vec[1] = ComputeEmbedding(ex, spk1_2);
+   spk1_vec[2] = ComputeEmbedding(ex, spk1_3);
+ 
+   const float *spk2_vec[3] = {NULL};
+   spk2_vec[0] = ComputeEmbedding(ex, spk2_1);
+   spk2_vec[1] = ComputeEmbedding(ex, spk2_2);
+ 
+   if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "fangjun", spk1_vec)) {
+     fprintf(stderr, "Failed to register fangjun\n");
+     exit(-1);
+   }
+ 
+   if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "fangjun")) {
+     fprintf(stderr, "Failed to find fangjun\n");
+     exit(-1);
+   }
+ 
+   if (!SherpaOnnxSpeakerEmbeddingManagerAddList(manager, "leijun", spk2_vec)) {
+     fprintf(stderr, "Failed to register leijun\n");
+     exit(-1);
+   }
+ 
+   if (!SherpaOnnxSpeakerEmbeddingManagerContains(manager, "leijun")) {
+     fprintf(stderr, "Failed to find leijun\n");
+     exit(-1);
+   }
+ 
+   if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 2) {
+     fprintf(stderr, "There should be two speakers: fangjun and leijun\n");
+     exit(-1);
+   }
+ 
+   const char *const *all_speakers =
+       SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
+   const char *const *p = all_speakers;
+   fprintf(stderr, "list of registered speakers\n-----\n");
+   while (p[0]) {
+     fprintf(stderr, "speaker: %s\n", p[0]);
+     ++p;
+   }
+   fprintf(stderr, "----\n");
+ 
+   SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);
+ 
+   const char *test1 = "./sr-data/test/fangjun-test-sr-1.wav";
+   const char *test2 = "./sr-data/test/leijun-test-sr-1.wav";
+   const char *test3 = "./sr-data/test/liudehua-test-sr-1.wav";
+ 
+   const float *v1 = ComputeEmbedding(ex, test1);
+   const float *v2 = ComputeEmbedding(ex, test2);
+   const float *v3 = ComputeEmbedding(ex, test3);
+ 
+   float threshold = 0.6;
+ 
+   const char *name1 =
+       SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
+   if (name1) {
+     fprintf(stderr, "%s: Found %s\n", test1, name1);
+     SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
+   } else {
+     fprintf(stderr, "%s: Not found\n", test1);
+   }
+ 
+   const char *name2 =
+       SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
+   if (name2) {
+     fprintf(stderr, "%s: Found %s\n", test2, name2);
+     SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
+   } else {
+     fprintf(stderr, "%s: Not found\n", test2);
+   }
+ 
+   const char *name3 =
+       SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v3, threshold);
+   if (name3) {
+     fprintf(stderr, "%s: Found %s\n", test3, name3);
+     SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name3);
+   } else {
+     fprintf(stderr, "%s: Not found\n", test3);
+   }
+ 
+   int32_t ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v1,
+                                                        threshold);
+   if (ok) {
+     fprintf(stderr, "%s matches fangjun\n", test1);
+   } else {
+     fprintf(stderr, "%s does NOT match fangjun\n", test1);
+   }
+ 
+   ok = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, "fangjun", v2,
+                                                threshold);
+   if (ok) {
+     fprintf(stderr, "%s matches fangjun\n", test2);
+   } else {
+     fprintf(stderr, "%s does NOT match fangjun\n", test2);
+   }
+ 
+   fprintf(stderr, "Removing fangjun\n");
+   if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "fangjun")) {
+     fprintf(stderr, "Failed to remove fangjun\n");
+     exit(-1);
+   }
+ 
+   if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 1) {
+     fprintf(stderr, "There should be only 1 speaker left\n");
+     exit(-1);
+   }
+ 
+   name1 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v1, threshold);
+   if (name1) {
+     fprintf(stderr, "%s: Found %s\n", test1, name1);
+     SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name1);
+   } else {
+     fprintf(stderr, "%s: Not found\n", test1);
+   }
+ 
+   fprintf(stderr, "Removing leijun\n");
+   if (!SherpaOnnxSpeakerEmbeddingManagerRemove(manager, "leijun")) {
+     fprintf(stderr, "Failed to remove leijun\n");
+     exit(-1);
+   }
+ 
+   if (SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager) != 0) {
+     fprintf(stderr, "There should be only 1 speaker left\n");
+     exit(-1);
+   }
+ 
+   name2 = SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v2, threshold);
+   if (name2) {
+     fprintf(stderr, "%s: Found %s\n", test2, name2);
+     SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name2);
+   } else {
+     fprintf(stderr, "%s: Not found\n", test2);
+   }
+ 
+   all_speakers = SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager);
+ 
+   p = all_speakers;
+   fprintf(stderr, "list of registered speakers\n-----\n");
+   while (p[0]) {
+     fprintf(stderr, "speaker: %s\n", p[0]);
+     ++p;
+   }
+   fprintf(stderr, "----\n");
+ 
+   SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speakers);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v1);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v2);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(v3);
+ 
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[0]);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[1]);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk1_vec[2]);
+ 
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[0]);
+   SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(spk2_vec[1]);
+ 
+   SherpaOnnxDestroySpeakerEmbeddingManager(manager);
+   SherpaOnnxDestroySpeakerEmbeddingExtractor(ex);
+ 
+   return 0;
+ }
--- a/c-api-examples/spoken-language-identification-c-api.c
查看文件 @2e0bcca
+++ b/c-api-examples/spoken-language-identification-c-api.c
查看文件 @2e0bcca
+ // c-api-examples/spoken-language-identification-c-api.c
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
 
 // We assume you have pre-downloaded the whisper multi-lingual models
 // from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
--- a/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
查看文件 @2e0bcca
+++ b/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
查看文件 @2e0bcca
@@ -83,7 +83,7 @@ class ViewController: UIViewController {
         // Please select one model that is best suitable for you.
         //
         // You can also modify Model.swift to add new pre-trained models from
-         // https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
+         // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
 
         // let modelConfig = getBilingualStreamZhEnZipformer20230220()
         // let modelConfig = getZhZipformer20230615()
--- a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
查看文件 @2e0bcca
+++ b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
查看文件 @2e0bcca
@@ -4,7 +4,7 @@
 //
 //  Created by fangjun on 2023/11/23.
 //
- // Speech-to-text with Next-gen Kaldi on iOS without Internet connection
+ // Text-to-speech with Next-gen Kaldi on iOS without Internet connection
 
 import SwiftUI
 import AVFoundation
--- a/python-api-examples/offline-tts-play.py
查看文件 @2e0bcca
+++ b/python-api-examples/offline-tts-play.py
查看文件 @2e0bcca
@@ -183,7 +183,7 @@ event = threading.Event()
 first_message_time = None
 
 
- def generated_audio_callback(samples: np.ndarray):
+ def generated_audio_callback(samples: np.ndarray, progress: float):
     """This function is called whenever max_num_sentences sentences
     have been processed.
 
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @2e0bcca
@@ -16,6 +16,8 @@
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-recognizer.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
+ #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
+ #include "sherpa-onnx/csrc/speaker-embedding-manager.h"
 #include "sherpa-onnx/csrc/spoken-language-identification.h"
 #include "sherpa-onnx/csrc/voice-activity-detector.h"
 #include "sherpa-onnx/csrc/wave-reader.h"
@@ -114,7 +116,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
   return recognizer;
 }
 
- void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) {
+ void DestroyOnlineRecognizer(const SherpaOnnxOnlineRecognizer *recognizer) {
   delete recognizer;
 }
 
@@ -132,25 +134,28 @@ SherpaOnnxOnlineStream *CreateOnlineStreamWithHotwords(
   return stream;
 }
 
- void DestroyOnlineStream(SherpaOnnxOnlineStream *stream) { delete stream; }
+ void DestroyOnlineStream(const SherpaOnnxOnlineStream *stream) {
+   delete stream;
+ }
 
- void AcceptWaveform(SherpaOnnxOnlineStream *stream, int32_t sample_rate,
+ void AcceptWaveform(const SherpaOnnxOnlineStream *stream, int32_t sample_rate,
                     const float *samples, int32_t n) {
   stream->impl->AcceptWaveform(sample_rate, samples, n);
 }
 
- int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer,
-                             SherpaOnnxOnlineStream *stream) {
+ int32_t IsOnlineStreamReady(const SherpaOnnxOnlineRecognizer *recognizer,
+                             const SherpaOnnxOnlineStream *stream) {
   return recognizer->impl->IsReady(stream->impl.get());
 }
 
- void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
-                         SherpaOnnxOnlineStream *stream) {
+ void DecodeOnlineStream(const SherpaOnnxOnlineRecognizer *recognizer,
+                         const SherpaOnnxOnlineStream *stream) {
   recognizer->impl->DecodeStream(stream->impl.get());
 }
 
- void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
-                                  SherpaOnnxOnlineStream **streams, int32_t n) {
+ void DecodeMultipleOnlineStreams(const SherpaOnnxOnlineRecognizer *recognizer,
+                                  const SherpaOnnxOnlineStream **streams,
+                                  int32_t n) {
   std::vector<sherpa_onnx::OnlineStream *> ss(n);
   for (int32_t i = 0; i != n; ++i) {
     ss[i] = streams[i]->impl.get();
@@ -159,7 +164,8 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
 }
 
 const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
-     SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream) {
+     const SherpaOnnxOnlineRecognizer *recognizer,
+     const SherpaOnnxOnlineStream *stream) {
   sherpa_onnx::OnlineRecognizerResult result =
       recognizer->impl->GetResult(stream->impl.get());
   const auto &text = result.text;
@@ -232,29 +238,30 @@ void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
   }
 }
 
- void Reset(SherpaOnnxOnlineRecognizer *recognizer,
-            SherpaOnnxOnlineStream *stream) {
+ void Reset(const SherpaOnnxOnlineRecognizer *recognizer,
+            const SherpaOnnxOnlineStream *stream) {
   recognizer->impl->Reset(stream->impl.get());
 }
 
- void InputFinished(SherpaOnnxOnlineStream *stream) {
+ void InputFinished(const SherpaOnnxOnlineStream *stream) {
   stream->impl->InputFinished();
 }
 
- int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
-                    SherpaOnnxOnlineStream *stream) {
+ int32_t IsEndpoint(const SherpaOnnxOnlineRecognizer *recognizer,
+                    const SherpaOnnxOnlineStream *stream) {
   return recognizer->impl->IsEndpoint(stream->impl.get());
 }
 
- SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line) {
+ const SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line) {
   SherpaOnnxDisplay *ans = new SherpaOnnxDisplay;
   ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line);
   return ans;
 }
 
- void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; }
+ void DestroyDisplay(const SherpaOnnxDisplay *display) { delete display; }
 
- void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) {
+ void SherpaOnnxPrint(const SherpaOnnxDisplay *display, int32_t idx,
+                      const char *s) {
   display->impl->Print(idx, s);
 }
 
@@ -808,9 +815,8 @@ int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
 }
 
 static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
-     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
-     float speed, std::function<void(const float *, int32_t, float)> callback) 
- {
+     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
+     std::function<void(const float *, int32_t, float)> callback) {
   sherpa_onnx::GeneratedAudio audio =
       tts->impl->Generate(text, sid, speed, callback);
 
@@ -833,36 +839,37 @@ static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
     float speed) {
-   return SherpaOnnxOfflineTtsGenerateInternal( tts, text, sid, speed, nullptr );
+   return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, nullptr);
 }
 
 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
     SherpaOnnxGeneratedAudioCallback callback) {
-   auto wrapper = [callback](const float *samples, int32_t n, float /*progress*/) {
-     callback(samples, n );
-   };
+   auto wrapper = [callback](const float *samples, int32_t n,
+                             float /*progress*/) { callback(samples, n); };
 
-   return SherpaOnnxOfflineTtsGenerateInternal( tts, text, sid, speed, wrapper );
+   return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
 
- const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithProgressCallback(
+ const SherpaOnnxGeneratedAudio *
+ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
     SherpaOnnxGeneratedAudioProgressCallback callback) {
   auto wrapper = [callback](const float *samples, int32_t n, float progress) {
-     callback(samples, n, progress );
+     callback(samples, n, progress);
   };
-   return SherpaOnnxOfflineTtsGenerateInternal( tts, text, sid, speed, wrapper );
+   return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
 
 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
     const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
     SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
-   auto wrapper = [callback, arg](const float *samples, int32_t n, float /*progress*/) {
+   auto wrapper = [callback, arg](const float *samples, int32_t n,
+                                  float /*progress*/) {
     callback(samples, n, arg);
   };
 
-   return SherpaOnnxOfflineTtsGenerateInternal( tts, text, sid, speed, wrapper );
+   return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
 
 void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
@@ -972,3 +979,200 @@ void SherpaOnnxDestroySpokenLanguageIdentificationResult(
     delete r;
   }
 }
+ 
+ struct SherpaOnnxSpeakerEmbeddingExtractor {
+   std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl;
+ };
+ 
+ const SherpaOnnxSpeakerEmbeddingExtractor *
+ SherpaOnnxCreateSpeakerEmbeddingExtractor(
+     const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
+   sherpa_onnx::SpeakerEmbeddingExtractorConfig c;
+   c.model = SHERPA_ONNX_OR(config->model, "");
+ 
+   c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
+   c.debug = SHERPA_ONNX_OR(config->debug, 0);
+   c.provider = SHERPA_ONNX_OR(config->provider, "cpu");
+ 
+   if (config->debug) {
+     SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
+   }
+ 
+   if (!c.Validate()) {
+     SHERPA_ONNX_LOGE("Errors in config!");
+     return nullptr;
+   }
+ 
+   auto p = new SherpaOnnxSpeakerEmbeddingExtractor;
+ 
+   p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(c);
+ 
+   return p;
+ }
+ 
+ void SherpaOnnxDestroySpeakerEmbeddingExtractor(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p) {
+   delete p;
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p) {
+   return p->impl->Dim();
+ }
+ 
+ const SherpaOnnxOnlineStream *SherpaOnnxSpeakerEmbeddingExtractorCreateStream(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p) {
+   SherpaOnnxOnlineStream *stream =
+       new SherpaOnnxOnlineStream(p->impl->CreateStream());
+   return stream;
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p,
+     const SherpaOnnxOnlineStream *s) {
+   return p->impl->IsReady(s->impl.get());
+ }
+ 
+ const float *SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p,
+     const SherpaOnnxOnlineStream *s) {
+   std::vector<float> v = p->impl->Compute(s->impl.get());
+   float *ans = new float[v.size()];
+   std::copy(v.begin(), v.end(), ans);
+   return ans;
+ }
+ 
+ void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(const float *v) {
+   delete[] v;
+ }
+ 
+ struct SherpaOnnxSpeakerEmbeddingManager {
+   std::unique_ptr<sherpa_onnx::SpeakerEmbeddingManager> impl;
+ };
+ 
+ const SherpaOnnxSpeakerEmbeddingManager *
+ SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim) {
+   auto p = new SherpaOnnxSpeakerEmbeddingManager;
+   p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingManager>(dim);
+   return p;
+ }
+ 
+ void SherpaOnnxDestroySpeakerEmbeddingManager(
+     const SherpaOnnxSpeakerEmbeddingManager *p) {
+   delete p;
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerAdd(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float *v) {
+   return p->impl->Add(name, v);
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float **v) {
+   int32_t n = 0;
+   auto q = v;
+   while (q && q[0]) {
+     ++n;
+     ++q;
+   }
+ 
+   if (n == 0) {
+     SHERPA_ONNX_LOGE("Empty embedding!");
+     return 0;
+   }
+ 
+   std::vector<std::vector<float>> vec(n);
+   int32_t dim = p->impl->Dim();
+ 
+   for (int32_t i = 0; i != n; ++i) {
+     vec[i] = std::vector<float>(v[i], v[i] + dim);
+   }
+ 
+   return p->impl->Add(name, vec);
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float *v, int32_t n) {
+   std::vector<std::vector<float>> vec(n);
+ 
+   int32_t dim = p->impl->Dim();
+ 
+   for (int32_t i = 0; i != n; ++i, v += dim) {
+     vec[i] = std::vector<float>(v, v + dim);
+   }
+ 
+   return p->impl->Add(name, vec);
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name) {
+   return p->impl->Remove(name);
+ }
+ 
+ const char *SherpaOnnxSpeakerEmbeddingManagerSearch(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const float *v,
+     float threshold) {
+   auto r = p->impl->Search(v, threshold);
+   if (r.empty()) {
+     return nullptr;
+   }
+ 
+   char *name = new char[r.size() + 1];
+   std::copy(r.begin(), r.end(), name);
+   name[r.size()] = '\0';
+ 
+   return name;
+ }
+ 
+ void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(const char *name) {
+   delete[] name;
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float *v, float threshold) {
+   return p->impl->Verify(name, v, threshold);
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerContains(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name) {
+   return p->impl->Contains(name);
+ }
+ 
+ int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(
+     const SherpaOnnxSpeakerEmbeddingManager *p) {
+   return p->impl->NumSpeakers();
+ }
+ 
+ const char *const *SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
+     const SherpaOnnxSpeakerEmbeddingManager *manager) {
+   std::vector<std::string> all_speakers = manager->impl->GetAllSpeakers();
+   int32_t num_speakers = all_speakers.size();
+   char **p = new char *[num_speakers + 1];
+   p[num_speakers] = nullptr;
+ 
+   int32_t i = 0;
+   for (const auto &name : all_speakers) {
+     p[i] = new char[name.size() + 1];
+     std::copy(name.begin(), name.end(), p[i]);
+     p[i][name.size()] = '\0';
+ 
+     i += 1;
+   }
+   return p;
+ }
+ 
+ void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
+     const char *const *names) {
+   auto p = names;
+ 
+   while (p && p[0]) {
+     delete[] p[0];
+     ++p;
+   }
+ 
+   delete[] names;
+ }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @2e0bcca
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @2e0bcca
@@ -186,7 +186,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
 ///
 /// @param p A pointer returned by CreateOnlineRecognizer()
 SHERPA_ONNX_API void DestroyOnlineRecognizer(
-     SherpaOnnxOnlineRecognizer *recognizer);
+     const SherpaOnnxOnlineRecognizer *recognizer);
 
 /// Create an online stream for accepting wave samples.
 ///
@@ -208,7 +208,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStreamWithHotwords(
 /// Destroy an online stream.
 ///
 /// @param stream A pointer returned by CreateOnlineStream()
- SHERPA_ONNX_API void DestroyOnlineStream(SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API void DestroyOnlineStream(const SherpaOnnxOnlineStream *stream);
 
 /// Accept input audio samples and compute the features.
 /// The user has to invoke DecodeOnlineStream() to run the neural network and
@@ -221,7 +221,7 @@ SHERPA_ONNX_API void DestroyOnlineStream(SherpaOnnxOnlineStream *stream);
 /// @param samples A pointer to a 1-D array containing audio samples.
 ///                The range of samples has to be normalized to [-1, 1].
 /// @param n  Number of elements in the samples array.
- SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
+ SHERPA_ONNX_API void AcceptWaveform(const SherpaOnnxOnlineStream *stream,
                                     int32_t sample_rate, const float *samples,
                                     int32_t n);
 
@@ -230,8 +230,9 @@ SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
 ///
 /// @param recognizer  A pointer returned by CreateOnlineRecognizer
 /// @param stream  A pointer returned by CreateOnlineStream
- SHERPA_ONNX_API int32_t IsOnlineStreamReady(
-     SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API int32_t
+ IsOnlineStreamReady(const SherpaOnnxOnlineRecognizer *recognizer,
+                     const SherpaOnnxOnlineStream *stream);
 
 /// Call this function to run the neural network model and decoding.
 //
@@ -243,8 +244,9 @@ SHERPA_ONNX_API int32_t IsOnlineStreamReady(
 ///     DecodeOnlineStream(recognizer, stream);
 ///  }
 ///
- SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
-                                         SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API void DecodeOnlineStream(
+     const SherpaOnnxOnlineRecognizer *recognizer,
+     const SherpaOnnxOnlineStream *stream);
 
 /// This function is similar to DecodeOnlineStream(). It decodes multiple
 /// OnlineStream in parallel.
@@ -257,8 +259,8 @@ SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
 ///                 CreateOnlineRecognizer()
 /// @param n  Number of elements in the given streams array.
 SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
-     SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams,
-     int32_t n);
+     const SherpaOnnxOnlineRecognizer *recognizer,
+     const SherpaOnnxOnlineStream **streams, int32_t n);
 
 /// Get the decoding results so far for an OnlineStream.
 ///
@@ -268,7 +270,8 @@ SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
 ///         DestroyOnlineRecognizerResult() to free the returned pointer to
 ///         avoid memory leak.
 SHERPA_ONNX_API const SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
-     SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream);
+     const SherpaOnnxOnlineRecognizer *recognizer,
+     const SherpaOnnxOnlineStream *stream);
 
 /// Destroy the pointer returned by GetOnlineStreamResult().
 ///
@@ -281,35 +284,36 @@ SHERPA_ONNX_API void DestroyOnlineRecognizerResult(
 ///
 /// @param recognizer A pointer returned by CreateOnlineRecognizer().
 /// @param stream A pointer returned by CreateOnlineStream
- SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer,
-                            SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API void Reset(const SherpaOnnxOnlineRecognizer *recognizer,
+                            const SherpaOnnxOnlineStream *stream);
 
 /// Signal that no more audio samples would be available.
 /// After this call, you cannot call AcceptWaveform() any more.
 ///
 /// @param stream A pointer returned by CreateOnlineStream()
- SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API void InputFinished(const SherpaOnnxOnlineStream *stream);
 
 /// Return 1 if an endpoint has been detected.
 ///
 /// @param recognizer A pointer returned by CreateOnlineRecognizer()
 /// @param stream A pointer returned by CreateOnlineStream()
 /// @return Return 1 if an endpoint is detected. Return 0 otherwise.
- SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer,
-                                    SherpaOnnxOnlineStream *stream);
+ SHERPA_ONNX_API int32_t IsEndpoint(const SherpaOnnxOnlineRecognizer *recognizer,
+                                    const SherpaOnnxOnlineStream *stream);
 
 // for displaying results on Linux/macOS.
 SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay;
 
 /// Create a display object. Must be freed using DestroyDisplay to avoid
 /// memory leak.
- SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line);
+ SHERPA_ONNX_API const SherpaOnnxDisplay *CreateDisplay(
+     int32_t max_word_per_line);
 
- SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display);
+ SHERPA_ONNX_API void DestroyDisplay(const SherpaOnnxDisplay *display);
 
 /// Print the result.
- SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx,
-                                      const char *s);
+ SHERPA_ONNX_API void SherpaOnnxPrint(const SherpaOnnxDisplay *display,
+                                      int32_t idx, const char *s);
 // ============================================================
 // For offline ASR (i.e., non-streaming ASR)
 // ============================================================
@@ -769,7 +773,7 @@ typedef void (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
                                                         int32_t n, void *arg);
 
 typedef void (*SherpaOnnxGeneratedAudioProgressCallback)(const float *samples,
-                                                  int32_t n, float p);
+                                                          int32_t n, float p);
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
 
@@ -839,7 +843,9 @@ SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
 
 SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
 
- // Spoken language identification
+ // ============================================================
+ // For spoken language identification
+ // ============================================================
 
 SHERPA_ONNX_API typedef struct
     SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
@@ -893,6 +899,169 @@ SherpaOnnxSpokenLanguageIdentificationCompute(
 SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
     const SherpaOnnxSpokenLanguageIdentificationResult *r);
 
+ // ============================================================
+ // For speaker embedding extraction
+ // ============================================================
+ SHERPA_ONNX_API typedef struct SherpaOnnxSpeakerEmbeddingExtractorConfig {
+   const char *model;
+   int32_t num_threads;
+   int32_t debug;
+   const char *provider;
+ } SherpaOnnxSpeakerEmbeddingExtractorConfig;
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxSpeakerEmbeddingExtractor
+     SherpaOnnxSpeakerEmbeddingExtractor;
+ 
+ // The user has to invoke SherpaOnnxDestroySpeakerEmbeddingExtractor()
+ // to free the returned pointer to avoid memory leak
+ SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
+ SherpaOnnxCreateSpeakerEmbeddingExtractor(
+     const SherpaOnnxSpeakerEmbeddingExtractorConfig *config);
+ 
+ SHERPA_ONNX_API void SherpaOnnxDestroySpeakerEmbeddingExtractor(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p);
+ 
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingExtractorDim(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p);
+ 
+ // The user has to invoke DestroyOnlineStream() to free the returned pointer
+ // to avoid memory leak
+ SHERPA_ONNX_API const SherpaOnnxOnlineStream *
+ SherpaOnnxSpeakerEmbeddingExtractorCreateStream(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p);
+ 
+ // Return 1 if the stream has enough feature frames for computing embeddings.
+ // Return 0 otherwise.
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingExtractorIsReady(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p,
+     const SherpaOnnxOnlineStream *s);
+ 
+ // Compute the embedding of the stream.
+ //
+ // @return Return a pointer pointing to an array containing the embedding.
+ // The length of the array is `dim` as returned by
+ // SherpaOnnxSpeakerEmbeddingExtractorDim(p)
+ //
+ // The user has to invoke SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding()
+ // to free the returned pointer to avoid memory leak.
+ SHERPA_ONNX_API const float *
+ SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(
+     const SherpaOnnxSpeakerEmbeddingExtractor *p,
+     const SherpaOnnxOnlineStream *s);
+ 
+ SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding(
+     const float *v);
+ 
+ SHERPA_ONNX_API typedef struct SherpaOnnxSpeakerEmbeddingManager
+     SherpaOnnxSpeakerEmbeddingManager;
+ 
+ // The user has to invoke SherpaOnnxDestroySpeakerEmbeddingManager()
+ // to free the returned pointer to avoid memory leak
+ SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingManager *
+ SherpaOnnxCreateSpeakerEmbeddingManager(int32_t dim);
+ 
+ SHERPA_ONNX_API void SherpaOnnxDestroySpeakerEmbeddingManager(
+     const SherpaOnnxSpeakerEmbeddingManager *p);
+ 
+ // Register the embedding of a user
+ //
+ // @param name  The name of the user
+ // @param p Pointer to an array containing the embeddings. The length of the
+ //          array must be equal to `dim` used to construct the manager `p`.
+ //
+ // @return Return 1 if added successfully. Return 0 on error
+ SHERPA_ONNX_API int32_t
+ SherpaOnnxSpeakerEmbeddingManagerAdd(const SherpaOnnxSpeakerEmbeddingManager *p,
+                                      const char *name, const float *v);
+ 
+ // @param v Pointer to an array of embeddings. If there are n embeddings, then
+ //          v[0] is the pointer to the 0-th array containing the embeddings
+ //          v[1] is the pointer to the 1-st array containing the embeddings
+ //          v[n-1] is the pointer to the last array containing the embeddings
+ //          v[n] is a NULL pointer
+ // @return Return 1 if added successfully. Return 0 on error
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerAddList(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float **v);
+ 
+ // Similar to SherpaOnnxSpeakerEmbeddingManagerAddList() but the memory
+ // is flattened.
+ //
+ // The length of the input array should be `n * dim`.
+ //
+ // @return Return 1 if added successfully. Return 0 on error
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerAddListFlattened(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float *v, int32_t n);
+ 
+ // Remove a user.
+ // @param naem The name of the user to remove.
+ // @return Return 1 if removed successfully; return 0 on error.
+ //
+ // Note if the user does not exist, it also returns 0.
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerRemove(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);
+ 
+ // Search if an existing users' embedding matches the given one.
+ //
+ // @param p Pointer to an array containing the embedding. The dim
+ //          of the array must equal to `dim` used to construct the manager `p`.
+ // @param threshold A value between 0 and 1. If the similarity score exceeds
+ //                  this threshold, we say a match is found.
+ // @return Returns the name of the user if found. Return NULL if not found.
+ //         If not NULL, the caller has to invoke
+ //          SherpaOnnxSpeakerEmbeddingManagerFreeSearch() to free the returned
+ //          pointer to avoid memory leak.
+ SHERPA_ONNX_API const char *SherpaOnnxSpeakerEmbeddingManagerSearch(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const float *v,
+     float threshold);
+ 
+ SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeSearch(
+     const char *name);
+ 
+ // Check whether the input embedding matches the embedding of the input
+ // speaker.
+ //
+ // It is for speaker verification.
+ //
+ // @param name The target speaker name.
+ // @param p The input embedding to check.
+ // @param threshold A value between 0 and 1.
+ // @return Return 1 if it matches. Otherwise, it returns 0.
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerVerify(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name,
+     const float *v, float threshold);
+ 
+ // Return 1 if the user with the name is in the manager.
+ // Return 0 if the user does not exist.
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerContains(
+     const SherpaOnnxSpeakerEmbeddingManager *p, const char *name);
+ 
+ // Return number of speakers in the manager.
+ SHERPA_ONNX_API int32_t SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(
+     const SherpaOnnxSpeakerEmbeddingManager *p);
+ 
+ // Return the name of all speakers in the manager.
+ //
+ // @return Return an array of pointers `ans`. If there are n speakers, then
+ // - ans[0] contains the name of the 0-th speaker
+ // - ans[1] contains the name of the 1-st speaker
+ // - ans[n-1] contains the name of the last speaker
+ // - ans[n] is NULL
+ // If there are no users at all, then ans[0] is NULL. In any case,
+ // `ans` is not NULL.
+ //
+ // Each name is NULL-terminated
+ //
+ // The caller has to invoke SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers()
+ // to free the returned pointer to avoid memory leak.
+ SHERPA_ONNX_API const char *const *
+ SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(
+     const SherpaOnnxSpeakerEmbeddingManager *p);
+ 
+ SHERPA_ONNX_API void SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(
+     const char *const *names);
+ 
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
--- a/sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @2e0bcca
@@ -168,7 +168,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
       ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                          audio.samples.end());
       if (callback) {
-         callback(audio.samples.data(), audio.samples.size(), b * 1.0 / num_batches);
+         callback(audio.samples.data(), audio.samples.size(),
+                  b * 1.0 / num_batches);
         // Caution(fangjun): audio is freed when the callback returns, so users
         // should copy the data if they want to access the data after
         // the callback returns to avoid segmentation fault.
--- a/sherpa-onnx/csrc/offline-tts.h
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/offline-tts.h
查看文件 @2e0bcca
@@ -54,8 +54,8 @@ struct GeneratedAudio {
 
 class OfflineTtsImpl;
 
- using GeneratedAudioCallback =
-     std::function<void(const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
+ using GeneratedAudioCallback = std::function<void(
+     const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
 
 class OfflineTts {
  public:
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
查看文件 @2e0bcca
@@ -44,7 +44,8 @@ static void Handler(int32_t /*sig*/) {
   fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
 }
 
- static void AudioGeneratedCallback(const float *s, int32_t n) {
+ static void AudioGeneratedCallback(const float *s, int32_t n,
+                                    float /*progress*/) {
   if (n > 0) {
     std::lock_guard<std::mutex> lock(g_buffer.mutex);
     g_buffer.samples.push({s, s + n});
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
查看文件 @2e0bcca
@@ -47,7 +47,8 @@ static void Handler(int32_t /*sig*/) {
   fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
 }
 
- static void AudioGeneratedCallback(const float *s, int32_t n, float /*progress*/) {
+ static void AudioGeneratedCallback(const float *s, int32_t n,
+                                    float /*progress*/) {
   if (n > 0) {
     Samples samples;
     samples.data = std::vector<float>{s, s + n};
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
查看文件 @2e0bcca
@@ -9,9 +9,8 @@
 #include "sherpa-onnx/csrc/parse-options.h"
 #include "sherpa-onnx/csrc/wave-writer.h"
 
- void audioCallback(const float *samples, int32_t n, float progress)
- {
- 	printf( "sample=%d, progress=%f\n", n, progress );
+ void audioCallback(const float *samples, int32_t n, float progress) {
+   printf("sample=%d, progress=%f\n", n, progress);
 }
 
 int main(int32_t argc, char *argv[]) {
--- a/sherpa-onnx/csrc/speaker-embedding-manager.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/csrc/speaker-embedding-manager.cc
查看文件 @2e0bcca
@@ -93,7 +93,7 @@ class SpeakerEmbeddingManager::Impl {
     int32_t num_rows = embedding_matrix_.rows();
 
     if (row_idx < num_rows - 1) {
-       embedding_matrix_.block(row_idx, 0, num_rows - -1 - row_idx, dim_) =
+       embedding_matrix_.block(row_idx, 0, num_rows - 1 - row_idx, dim_) =
           embedding_matrix_.bottomRows(num_rows - 1 - row_idx);
     }
 
--- a/sherpa-onnx/jni/jni.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/jni/jni.cc
查看文件 @2e0bcca
@@ -795,9 +795,10 @@ class SherpaOnnxOfflineTts {
   explicit SherpaOnnxOfflineTts(const OfflineTtsConfig &config)
       : tts_(config) {}
 
-   GeneratedAudio Generate(
-       const std::string &text, int64_t sid = 0, float speed = 1.0,
-       std::function<void(const float *, int32_t, float)> callback = nullptr) const {
+   GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
+                           float speed = 1.0,
+                           std::function<void(const float *, int32_t, float)>
+                               callback = nullptr) const {
     return tts_.Generate(text, sid, speed, callback);
   }
 
--- a/sherpa-onnx/python/csrc/offline-tts.cc
查看文件 @2e0bcca
+++ b/sherpa-onnx/python/csrc/offline-tts.cc
查看文件 @2e0bcca
@@ -55,14 +55,16 @@ void PybindOfflineTts(py::module *m) {
       .def(
           "generate",
           [](const PyClass &self, const std::string &text, int64_t sid,
-              float speed, std::function<void(py::array_t<float>, float)> callback)
+              float speed,
+              std::function<void(py::array_t<float>, float)> callback)
               -> GeneratedAudio {
             if (!callback) {
               return self.Generate(text, sid, speed);
             }
 
-             std::function<void(const float *, int32_t, float)> callback_wrapper =
-                 [callback](const float *samples, int32_t n, float progress) {
+             std::function<void(const float *, int32_t, float)>
+                 callback_wrapper = [callback](const float *samples, int32_t n,
+                                               float progress) {
                   // CAUTION(fangjun): we have to copy samples since it is
                   // freed once the call back returns.