Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-28 18:05:40 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-28 18:05:40 +0800
Commit
2e0bccad36502c7a985d84c88feef4ba4e8cd404
2e0bccad
1 parent
638f48f4
Add C API for speaker embedding extractor. (#711)
隐藏空白字符变更
内嵌
并排对比
正在显示
23 个修改的文件
包含
739 行增加
和
80 行删除
.github/scripts/test-c-api.sh
.github/workflows/linux.yaml
.github/workflows/macos.yaml
.github/workflows/windows-x64.yaml
.gitignore
c-api-examples/CMakeLists.txt
c-api-examples/asr-microphone-example/c-api-alsa.cc
c-api-examples/decode-file-c-api.c
c-api-examples/speaker-identification-c-api.c
c-api-examples/spoken-language-identification-c-api.c
ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
python-api-examples/offline-tts-play.py
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/offline-tts-vits-impl.h
sherpa-onnx/csrc/offline-tts.h
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
sherpa-onnx/csrc/speaker-embedding-manager.cc
sherpa-onnx/jni/jni.cc
sherpa-onnx/python/csrc/offline-tts.cc
.github/scripts/test-c-api.sh
查看文件 @
2e0bcca
#!/usr/bin/env bash
set
-e
set
-e
x
log
()
{
# This function is from espnet
...
...
@@ -9,6 +9,7 @@ log() {
}
echo
"SLID_EXE is
$SLID_EXE
"
echo
"SID_EXE is
$SID_EXE
"
echo
"PATH:
$PATH
"
...
...
@@ -24,3 +25,15 @@ rm sherpa-onnx-whisper-tiny.tar.bz2
$SLID_EXE
rm -rf sherpa-onnx-whisper-tiny
*
log
"------------------------------------------------------------"
log
"Download file for speaker identification and verification "
log
"------------------------------------------------------------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
git clone https://github.com/csukuangfj/sr-data
$SID_EXE
rm -fv
*
.onnx
rm -rf sr-data
...
...
.github/workflows/linux.yaml
查看文件 @
2e0bcca
...
...
@@ -124,11 +124,12 @@ jobs:
name
:
release-${{ matrix.build_type }}-with-shared-lib-${{ matrix.shared_lib }}-with-tts-${{ matrix.with_tts }}
path
:
build/bin/*
-
name
:
Test
spoken language identification (C API)
-
name
:
Test
C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
.github/scripts/test-c-api.sh
...
...
.github/workflows/macos.yaml
查看文件 @
2e0bcca
...
...
@@ -103,11 +103,12 @@ jobs:
otool -L build/bin/sherpa-onnx
otool -l build/bin/sherpa-onnx
-
name
:
Test
spoken language identification (C API)
-
name
:
Test
C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin:$PATH
export SLID_EXE=spoken-language-identification-c-api
export SID_EXE=speaker-identification-c-api
.github/scripts/test-c-api.sh
...
...
.github/workflows/windows-x64.yaml
查看文件 @
2e0bcca
...
...
@@ -70,11 +70,12 @@ jobs:
ls -lh ./bin/Release/sherpa-onnx.exe
-
name
:
Test
spoken language identification (C API)
-
name
:
Test
C API
shell
:
bash
run
:
|
export PATH=$PWD/build/bin/Release:$PATH
export SLID_EXE=spoken-language-identification-c-api.exe
export SID_EXE=speaker-identification-c-api.exe
.github/scripts/test-c-api.sh
...
...
.gitignore
查看文件 @
2e0bcca
...
...
@@ -87,3 +87,4 @@ vits-coqui-*
vits-mms-*
*.tar.bz2
sherpa-onnx-paraformer-trilingual-zh-cantonese-en
sr-data
...
...
c-api-examples/CMakeLists.txt
查看文件 @
2e0bcca
...
...
@@ -12,6 +12,9 @@ endif()
add_executable
(
spoken-language-identification-c-api spoken-language-identification-c-api.c
)
target_link_libraries
(
spoken-language-identification-c-api sherpa-onnx-c-api
)
add_executable
(
speaker-identification-c-api speaker-identification-c-api.c
)
target_link_libraries
(
speaker-identification-c-api sherpa-onnx-c-api
)
if
(
SHERPA_ONNX_HAS_ALSA
)
add_subdirectory
(
./asr-microphone-example
)
elseif
((
UNIX AND NOT APPLE
)
OR LINUX
)
...
...
c-api-examples/asr-microphone-example/c-api-alsa.cc
查看文件 @
2e0bcca
...
...
@@ -188,10 +188,11 @@ int32_t main(int32_t argc, char *argv[]) {
}
}
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
config
);
SherpaOnnxOnlineStream
*
stream
=
CreateOnlineStream
(
recognizer
);
const
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
config
);
const
SherpaOnnxOnlineStream
*
stream
=
CreateOnlineStream
(
recognizer
);
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
50
);
const
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
50
);
int32_t
segment_id
=
0
;
const
char
*
device_name
=
argv
[
context
.
index
];
...
...
c-api-examples/decode-file-c-api.c
查看文件 @
2e0bcca
...
...
@@ -162,10 +162,11 @@ int32_t main(int32_t argc, char *argv[]) {
}
}
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
config
);
SherpaOnnxOnlineStream
*
stream
=
CreateOnlineStream
(
recognizer
);
const
SherpaOnnxOnlineRecognizer
*
recognizer
=
CreateOnlineRecognizer
(
&
config
);
const
SherpaOnnxOnlineStream
*
stream
=
CreateOnlineStream
(
recognizer
);
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
50
);
const
SherpaOnnxDisplay
*
display
=
CreateDisplay
(
50
);
int32_t
segment_id
=
0
;
const
char
*
wav_filename
=
argv
[
context
.
index
];
...
...
c-api-examples/speaker-identification-c-api.c
0 → 100644
查看文件 @
2e0bcca
// c-api-examples/speaker-identification-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation
// We assume you have pre-downloaded the speaker embedding extractor model
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
//
// An example command to download
// "3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
// is given below:
//
// clang-format off
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
//
// clang-format on
//
// Also, please download the test wave files from
//
// https://github.com/csukuangfj/sr-data
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
static
const
float
*
ComputeEmbedding
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
ex
,
const
char
*
wav_filename
)
{
const
SherpaOnnxWave
*
wave
=
SherpaOnnxReadWave
(
wav_filename
);
if
(
wave
==
NULL
)
{
fprintf
(
stderr
,
"Failed to read %s
\n
"
,
wav_filename
);
exit
(
-
1
);
}
const
SherpaOnnxOnlineStream
*
stream
=
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
ex
);
AcceptWaveform
(
stream
,
wave
->
sample_rate
,
wave
->
samples
,
wave
->
num_samples
);
InputFinished
(
stream
);
if
(
!
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
ex
,
stream
))
{
fprintf
(
stderr
,
"The input wave file %s is too short!
\n
"
,
wav_filename
);
exit
(
-
1
);
}
// we will free `v` outside of this function
const
float
*
v
=
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
ex
,
stream
);
DestroyOnlineStream
(
stream
);
SherpaOnnxFreeWave
(
wave
);
// Remeber to free v to avoid memory leak
return
v
;
}
int32_t
main
()
{
SherpaOnnxSpeakerEmbeddingExtractorConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
// please download the model from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
config
.
model
=
"./3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
;
config
.
num_threads
=
1
;
config
.
debug
=
0
;
config
.
provider
=
"cpu"
;
const
SherpaOnnxSpeakerEmbeddingExtractor
*
ex
=
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
&
config
);
if
(
!
ex
)
{
fprintf
(
stderr
,
"Failed to create speaker embedding extractor"
);
return
-
1
;
}
int32_t
dim
=
SherpaOnnxSpeakerEmbeddingExtractorDim
(
ex
);
const
SherpaOnnxSpeakerEmbeddingManager
*
manager
=
SherpaOnnxCreateSpeakerEmbeddingManager
(
dim
);
// Please download the test data from
// https://github.com/csukuangfj/sr-data
const
char
*
spk1_1
=
"./sr-data/enroll/fangjun-sr-1.wav"
;
const
char
*
spk1_2
=
"./sr-data/enroll/fangjun-sr-2.wav"
;
const
char
*
spk1_3
=
"./sr-data/enroll/fangjun-sr-3.wav"
;
const
char
*
spk2_1
=
"./sr-data/enroll/leijun-sr-1.wav"
;
const
char
*
spk2_2
=
"./sr-data/enroll/leijun-sr-2.wav"
;
const
float
*
spk1_vec
[
4
]
=
{
NULL
};
spk1_vec
[
0
]
=
ComputeEmbedding
(
ex
,
spk1_1
);
spk1_vec
[
1
]
=
ComputeEmbedding
(
ex
,
spk1_2
);
spk1_vec
[
2
]
=
ComputeEmbedding
(
ex
,
spk1_3
);
const
float
*
spk2_vec
[
3
]
=
{
NULL
};
spk2_vec
[
0
]
=
ComputeEmbedding
(
ex
,
spk2_1
);
spk2_vec
[
1
]
=
ComputeEmbedding
(
ex
,
spk2_2
);
if
(
!
SherpaOnnxSpeakerEmbeddingManagerAddList
(
manager
,
"fangjun"
,
spk1_vec
))
{
fprintf
(
stderr
,
"Failed to register fangjun
\n
"
);
exit
(
-
1
);
}
if
(
!
SherpaOnnxSpeakerEmbeddingManagerContains
(
manager
,
"fangjun"
))
{
fprintf
(
stderr
,
"Failed to find fangjun
\n
"
);
exit
(
-
1
);
}
if
(
!
SherpaOnnxSpeakerEmbeddingManagerAddList
(
manager
,
"leijun"
,
spk2_vec
))
{
fprintf
(
stderr
,
"Failed to register leijun
\n
"
);
exit
(
-
1
);
}
if
(
!
SherpaOnnxSpeakerEmbeddingManagerContains
(
manager
,
"leijun"
))
{
fprintf
(
stderr
,
"Failed to find leijun
\n
"
);
exit
(
-
1
);
}
if
(
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
manager
)
!=
2
)
{
fprintf
(
stderr
,
"There should be two speakers: fangjun and leijun
\n
"
);
exit
(
-
1
);
}
const
char
*
const
*
all_speakers
=
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
manager
);
const
char
*
const
*
p
=
all_speakers
;
fprintf
(
stderr
,
"list of registered speakers
\n
-----
\n
"
);
while
(
p
[
0
])
{
fprintf
(
stderr
,
"speaker: %s
\n
"
,
p
[
0
]);
++
p
;
}
fprintf
(
stderr
,
"----
\n
"
);
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
all_speakers
);
const
char
*
test1
=
"./sr-data/test/fangjun-test-sr-1.wav"
;
const
char
*
test2
=
"./sr-data/test/leijun-test-sr-1.wav"
;
const
char
*
test3
=
"./sr-data/test/liudehua-test-sr-1.wav"
;
const
float
*
v1
=
ComputeEmbedding
(
ex
,
test1
);
const
float
*
v2
=
ComputeEmbedding
(
ex
,
test2
);
const
float
*
v3
=
ComputeEmbedding
(
ex
,
test3
);
float
threshold
=
0
.
6
;
const
char
*
name1
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
manager
,
v1
,
threshold
);
if
(
name1
)
{
fprintf
(
stderr
,
"%s: Found %s
\n
"
,
test1
,
name1
);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
name1
);
}
else
{
fprintf
(
stderr
,
"%s: Not found
\n
"
,
test1
);
}
const
char
*
name2
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
manager
,
v2
,
threshold
);
if
(
name2
)
{
fprintf
(
stderr
,
"%s: Found %s
\n
"
,
test2
,
name2
);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
name2
);
}
else
{
fprintf
(
stderr
,
"%s: Not found
\n
"
,
test2
);
}
const
char
*
name3
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
manager
,
v3
,
threshold
);
if
(
name3
)
{
fprintf
(
stderr
,
"%s: Found %s
\n
"
,
test3
,
name3
);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
name3
);
}
else
{
fprintf
(
stderr
,
"%s: Not found
\n
"
,
test3
);
}
int32_t
ok
=
SherpaOnnxSpeakerEmbeddingManagerVerify
(
manager
,
"fangjun"
,
v1
,
threshold
);
if
(
ok
)
{
fprintf
(
stderr
,
"%s matches fangjun
\n
"
,
test1
);
}
else
{
fprintf
(
stderr
,
"%s does NOT match fangjun
\n
"
,
test1
);
}
ok
=
SherpaOnnxSpeakerEmbeddingManagerVerify
(
manager
,
"fangjun"
,
v2
,
threshold
);
if
(
ok
)
{
fprintf
(
stderr
,
"%s matches fangjun
\n
"
,
test2
);
}
else
{
fprintf
(
stderr
,
"%s does NOT match fangjun
\n
"
,
test2
);
}
fprintf
(
stderr
,
"Removing fangjun
\n
"
);
if
(
!
SherpaOnnxSpeakerEmbeddingManagerRemove
(
manager
,
"fangjun"
))
{
fprintf
(
stderr
,
"Failed to remove fangjun
\n
"
);
exit
(
-
1
);
}
if
(
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
manager
)
!=
1
)
{
fprintf
(
stderr
,
"There should be only 1 speaker left
\n
"
);
exit
(
-
1
);
}
name1
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
manager
,
v1
,
threshold
);
if
(
name1
)
{
fprintf
(
stderr
,
"%s: Found %s
\n
"
,
test1
,
name1
);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
name1
);
}
else
{
fprintf
(
stderr
,
"%s: Not found
\n
"
,
test1
);
}
fprintf
(
stderr
,
"Removing leijun
\n
"
);
if
(
!
SherpaOnnxSpeakerEmbeddingManagerRemove
(
manager
,
"leijun"
))
{
fprintf
(
stderr
,
"Failed to remove leijun
\n
"
);
exit
(
-
1
);
}
if
(
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
manager
)
!=
0
)
{
fprintf
(
stderr
,
"There should be only 1 speaker left
\n
"
);
exit
(
-
1
);
}
name2
=
SherpaOnnxSpeakerEmbeddingManagerSearch
(
manager
,
v2
,
threshold
);
if
(
name2
)
{
fprintf
(
stderr
,
"%s: Found %s
\n
"
,
test2
,
name2
);
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
name2
);
}
else
{
fprintf
(
stderr
,
"%s: Not found
\n
"
,
test2
);
}
all_speakers
=
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
manager
);
p
=
all_speakers
;
fprintf
(
stderr
,
"list of registered speakers
\n
-----
\n
"
);
while
(
p
[
0
])
{
fprintf
(
stderr
,
"speaker: %s
\n
"
,
p
[
0
]);
++
p
;
}
fprintf
(
stderr
,
"----
\n
"
);
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
all_speakers
);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
v1
);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
v2
);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
v3
);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
spk1_vec
[
0
]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
spk1_vec
[
1
]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
spk1_vec
[
2
]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
spk2_vec
[
0
]);
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
spk2_vec
[
1
]);
SherpaOnnxDestroySpeakerEmbeddingManager
(
manager
);
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
ex
);
return
0
;
}
...
...
c-api-examples/spoken-language-identification-c-api.c
查看文件 @
2e0bcca
// c-api-examples/spoken-language-identification-c-api.c
//
// Copyright (c) 2024 Xiaomi Corporation
// We assume you have pre-downloaded the whisper multi-lingual models
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
...
...
ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift
查看文件 @
2e0bcca
...
...
@@ -83,7 +83,7 @@ class ViewController: UIViewController {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/
ncnn
/pretrained_models/index.html
// https://k2-fsa.github.io/sherpa/
onnx
/pretrained_models/index.html
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
// let modelConfig = getZhZipformer20230615()
...
...
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
查看文件 @
2e0bcca
...
...
@@ -4,7 +4,7 @@
//
// Created by fangjun on 2023/11/23.
//
//
Speech-to-text
with Next-gen Kaldi on iOS without Internet connection
//
Text-to-speech
with Next-gen Kaldi on iOS without Internet connection
import
SwiftUI
import
AVFoundation
...
...
python-api-examples/offline-tts-play.py
查看文件 @
2e0bcca
...
...
@@ -183,7 +183,7 @@ event = threading.Event()
first_message_time
=
None
def
generated_audio_callback
(
samples
:
np
.
ndarray
):
def
generated_audio_callback
(
samples
:
np
.
ndarray
,
progress
:
float
):
"""This function is called whenever max_num_sentences sentences
have been processed.
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
2e0bcca
...
...
@@ -16,6 +16,8 @@
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/csrc/spoken-language-identification.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
...
...
@@ -114,7 +116,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
return
recognizer
;
}
void
DestroyOnlineRecognizer
(
SherpaOnnxOnlineRecognizer
*
recognizer
)
{
void
DestroyOnlineRecognizer
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
)
{
delete
recognizer
;
}
...
...
@@ -132,25 +134,28 @@ SherpaOnnxOnlineStream *CreateOnlineStreamWithHotwords(
return
stream
;
}
void
DestroyOnlineStream
(
SherpaOnnxOnlineStream
*
stream
)
{
delete
stream
;
}
void
DestroyOnlineStream
(
const
SherpaOnnxOnlineStream
*
stream
)
{
delete
stream
;
}
void
AcceptWaveform
(
SherpaOnnxOnlineStream
*
stream
,
int32_t
sample_rate
,
void
AcceptWaveform
(
const
SherpaOnnxOnlineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
)
{
stream
->
impl
->
AcceptWaveform
(
sample_rate
,
samples
,
n
);
}
int32_t
IsOnlineStreamReady
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
)
{
int32_t
IsOnlineStreamReady
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
)
{
return
recognizer
->
impl
->
IsReady
(
stream
->
impl
.
get
());
}
void
DecodeOnlineStream
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
)
{
void
DecodeOnlineStream
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
)
{
recognizer
->
impl
->
DecodeStream
(
stream
->
impl
.
get
());
}
void
DecodeMultipleOnlineStreams
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
**
streams
,
int32_t
n
)
{
void
DecodeMultipleOnlineStreams
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
**
streams
,
int32_t
n
)
{
std
::
vector
<
sherpa_onnx
::
OnlineStream
*>
ss
(
n
);
for
(
int32_t
i
=
0
;
i
!=
n
;
++
i
)
{
ss
[
i
]
=
streams
[
i
]
->
impl
.
get
();
...
...
@@ -159,7 +164,8 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer,
}
const
SherpaOnnxOnlineRecognizerResult
*
GetOnlineStreamResult
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
)
{
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
)
{
sherpa_onnx
::
OnlineRecognizerResult
result
=
recognizer
->
impl
->
GetResult
(
stream
->
impl
.
get
());
const
auto
&
text
=
result
.
text
;
...
...
@@ -232,29 +238,30 @@ void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) {
}
}
void
Reset
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
)
{
void
Reset
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
)
{
recognizer
->
impl
->
Reset
(
stream
->
impl
.
get
());
}
void
InputFinished
(
SherpaOnnxOnlineStream
*
stream
)
{
void
InputFinished
(
const
SherpaOnnxOnlineStream
*
stream
)
{
stream
->
impl
->
InputFinished
();
}
int32_t
IsEndpoint
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
)
{
int32_t
IsEndpoint
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
)
{
return
recognizer
->
impl
->
IsEndpoint
(
stream
->
impl
.
get
());
}
SherpaOnnxDisplay
*
CreateDisplay
(
int32_t
max_word_per_line
)
{
const
SherpaOnnxDisplay
*
CreateDisplay
(
int32_t
max_word_per_line
)
{
SherpaOnnxDisplay
*
ans
=
new
SherpaOnnxDisplay
;
ans
->
impl
=
std
::
make_unique
<
sherpa_onnx
::
Display
>
(
max_word_per_line
);
return
ans
;
}
void
DestroyDisplay
(
SherpaOnnxDisplay
*
display
)
{
delete
display
;
}
void
DestroyDisplay
(
const
SherpaOnnxDisplay
*
display
)
{
delete
display
;
}
void
SherpaOnnxPrint
(
SherpaOnnxDisplay
*
display
,
int32_t
idx
,
const
char
*
s
)
{
void
SherpaOnnxPrint
(
const
SherpaOnnxDisplay
*
display
,
int32_t
idx
,
const
char
*
s
)
{
display
->
impl
->
Print
(
idx
,
s
);
}
...
...
@@ -808,9 +815,8 @@ int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
}
static
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerateInternal
(
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
,
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback
)
{
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
,
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback
)
{
sherpa_onnx
::
GeneratedAudio
audio
=
tts
->
impl
->
Generate
(
text
,
sid
,
speed
,
callback
);
...
...
@@ -833,36 +839,37 @@ static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerate
(
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
)
{
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
nullptr
);
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
nullptr
);
}
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerateWithCallback
(
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
,
SherpaOnnxGeneratedAudioCallback
callback
)
{
auto
wrapper
=
[
callback
](
const
float
*
samples
,
int32_t
n
,
float
/*progress*/
)
{
callback
(
samples
,
n
);
};
auto
wrapper
=
[
callback
](
const
float
*
samples
,
int32_t
n
,
float
/*progress*/
)
{
callback
(
samples
,
n
);
};
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
}
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerateWithProgressCallback
(
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerateWithProgressCallback
(
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
,
SherpaOnnxGeneratedAudioProgressCallback
callback
)
{
auto
wrapper
=
[
callback
](
const
float
*
samples
,
int32_t
n
,
float
progress
)
{
callback
(
samples
,
n
,
progress
);
callback
(
samples
,
n
,
progress
);
};
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
}
const
SherpaOnnxGeneratedAudio
*
SherpaOnnxOfflineTtsGenerateWithCallbackWithArg
(
const
SherpaOnnxOfflineTts
*
tts
,
const
char
*
text
,
int32_t
sid
,
float
speed
,
SherpaOnnxGeneratedAudioCallbackWithArg
callback
,
void
*
arg
)
{
auto
wrapper
=
[
callback
,
arg
](
const
float
*
samples
,
int32_t
n
,
float
/*progress*/
)
{
auto
wrapper
=
[
callback
,
arg
](
const
float
*
samples
,
int32_t
n
,
float
/*progress*/
)
{
callback
(
samples
,
n
,
arg
);
};
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
return
SherpaOnnxOfflineTtsGenerateInternal
(
tts
,
text
,
sid
,
speed
,
wrapper
);
}
void
SherpaOnnxDestroyOfflineTtsGeneratedAudio
(
...
...
@@ -972,3 +979,200 @@ void SherpaOnnxDestroySpokenLanguageIdentificationResult(
delete
r
;
}
}
struct
SherpaOnnxSpeakerEmbeddingExtractor
{
std
::
unique_ptr
<
sherpa_onnx
::
SpeakerEmbeddingExtractor
>
impl
;
};
const
SherpaOnnxSpeakerEmbeddingExtractor
*
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
const
SherpaOnnxSpeakerEmbeddingExtractorConfig
*
config
)
{
sherpa_onnx
::
SpeakerEmbeddingExtractorConfig
c
;
c
.
model
=
SHERPA_ONNX_OR
(
config
->
model
,
""
);
c
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
num_threads
,
1
);
c
.
debug
=
SHERPA_ONNX_OR
(
config
->
debug
,
0
);
c
.
provider
=
SHERPA_ONNX_OR
(
config
->
provider
,
"cpu"
);
if
(
config
->
debug
)
{
SHERPA_ONNX_LOGE
(
"%s
\n
"
,
c
.
ToString
().
c_str
());
}
if
(
!
c
.
Validate
())
{
SHERPA_ONNX_LOGE
(
"Errors in config!"
);
return
nullptr
;
}
auto
p
=
new
SherpaOnnxSpeakerEmbeddingExtractor
;
p
->
impl
=
std
::
make_unique
<
sherpa_onnx
::
SpeakerEmbeddingExtractor
>
(
c
);
return
p
;
}
void
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
)
{
delete
p
;
}
int32_t
SherpaOnnxSpeakerEmbeddingExtractorDim
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
)
{
return
p
->
impl
->
Dim
();
}
const
SherpaOnnxOnlineStream
*
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
)
{
SherpaOnnxOnlineStream
*
stream
=
new
SherpaOnnxOnlineStream
(
p
->
impl
->
CreateStream
());
return
stream
;
}
int32_t
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
,
const
SherpaOnnxOnlineStream
*
s
)
{
return
p
->
impl
->
IsReady
(
s
->
impl
.
get
());
}
const
float
*
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
,
const
SherpaOnnxOnlineStream
*
s
)
{
std
::
vector
<
float
>
v
=
p
->
impl
->
Compute
(
s
->
impl
.
get
());
float
*
ans
=
new
float
[
v
.
size
()];
std
::
copy
(
v
.
begin
(),
v
.
end
(),
ans
);
return
ans
;
}
void
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
const
float
*
v
)
{
delete
[]
v
;
}
struct
SherpaOnnxSpeakerEmbeddingManager
{
std
::
unique_ptr
<
sherpa_onnx
::
SpeakerEmbeddingManager
>
impl
;
};
const
SherpaOnnxSpeakerEmbeddingManager
*
SherpaOnnxCreateSpeakerEmbeddingManager
(
int32_t
dim
)
{
auto
p
=
new
SherpaOnnxSpeakerEmbeddingManager
;
p
->
impl
=
std
::
make_unique
<
sherpa_onnx
::
SpeakerEmbeddingManager
>
(
dim
);
return
p
;
}
void
SherpaOnnxDestroySpeakerEmbeddingManager
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
)
{
delete
p
;
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerAdd
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
)
{
return
p
->
impl
->
Add
(
name
,
v
);
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerAddList
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
**
v
)
{
int32_t
n
=
0
;
auto
q
=
v
;
while
(
q
&&
q
[
0
])
{
++
n
;
++
q
;
}
if
(
n
==
0
)
{
SHERPA_ONNX_LOGE
(
"Empty embedding!"
);
return
0
;
}
std
::
vector
<
std
::
vector
<
float
>>
vec
(
n
);
int32_t
dim
=
p
->
impl
->
Dim
();
for
(
int32_t
i
=
0
;
i
!=
n
;
++
i
)
{
vec
[
i
]
=
std
::
vector
<
float
>
(
v
[
i
],
v
[
i
]
+
dim
);
}
return
p
->
impl
->
Add
(
name
,
vec
);
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerAddListFlattened
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
,
int32_t
n
)
{
std
::
vector
<
std
::
vector
<
float
>>
vec
(
n
);
int32_t
dim
=
p
->
impl
->
Dim
();
for
(
int32_t
i
=
0
;
i
!=
n
;
++
i
,
v
+=
dim
)
{
vec
[
i
]
=
std
::
vector
<
float
>
(
v
,
v
+
dim
);
}
return
p
->
impl
->
Add
(
name
,
vec
);
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerRemove
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
)
{
return
p
->
impl
->
Remove
(
name
);
}
const
char
*
SherpaOnnxSpeakerEmbeddingManagerSearch
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
float
*
v
,
float
threshold
)
{
auto
r
=
p
->
impl
->
Search
(
v
,
threshold
);
if
(
r
.
empty
())
{
return
nullptr
;
}
char
*
name
=
new
char
[
r
.
size
()
+
1
];
std
::
copy
(
r
.
begin
(),
r
.
end
(),
name
);
name
[
r
.
size
()]
=
'\0'
;
return
name
;
}
void
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
const
char
*
name
)
{
delete
[]
name
;
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerVerify
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
,
float
threshold
)
{
return
p
->
impl
->
Verify
(
name
,
v
,
threshold
);
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerContains
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
)
{
return
p
->
impl
->
Contains
(
name
);
}
int32_t
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
)
{
return
p
->
impl
->
NumSpeakers
();
}
const
char
*
const
*
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
const
SherpaOnnxSpeakerEmbeddingManager
*
manager
)
{
std
::
vector
<
std
::
string
>
all_speakers
=
manager
->
impl
->
GetAllSpeakers
();
int32_t
num_speakers
=
all_speakers
.
size
();
char
**
p
=
new
char
*
[
num_speakers
+
1
];
p
[
num_speakers
]
=
nullptr
;
int32_t
i
=
0
;
for
(
const
auto
&
name
:
all_speakers
)
{
p
[
i
]
=
new
char
[
name
.
size
()
+
1
];
std
::
copy
(
name
.
begin
(),
name
.
end
(),
p
[
i
]);
p
[
i
][
name
.
size
()]
=
'\0'
;
i
+=
1
;
}
return
p
;
}
void
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
const
char
*
const
*
names
)
{
auto
p
=
names
;
while
(
p
&&
p
[
0
])
{
delete
[]
p
[
0
];
++
p
;
}
delete
[]
names
;
}
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
2e0bcca
...
...
@@ -186,7 +186,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
///
/// @param p A pointer returned by CreateOnlineRecognizer()
SHERPA_ONNX_API
void
DestroyOnlineRecognizer
(
SherpaOnnxOnlineRecognizer
*
recognizer
);
const
SherpaOnnxOnlineRecognizer
*
recognizer
);
/// Create an online stream for accepting wave samples.
///
...
...
@@ -208,7 +208,7 @@ SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStreamWithHotwords(
/// Destroy an online stream.
///
/// @param stream A pointer returned by CreateOnlineStream()
SHERPA_ONNX_API
void
DestroyOnlineStream
(
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
void
DestroyOnlineStream
(
const
SherpaOnnxOnlineStream
*
stream
);
/// Accept input audio samples and compute the features.
/// The user has to invoke DecodeOnlineStream() to run the neural network and
...
...
@@ -221,7 +221,7 @@ SHERPA_ONNX_API void DestroyOnlineStream(SherpaOnnxOnlineStream *stream);
/// @param samples A pointer to a 1-D array containing audio samples.
/// The range of samples has to be normalized to [-1, 1].
/// @param n Number of elements in the samples array.
SHERPA_ONNX_API
void
AcceptWaveform
(
SherpaOnnxOnlineStream
*
stream
,
SHERPA_ONNX_API
void
AcceptWaveform
(
const
SherpaOnnxOnlineStream
*
stream
,
int32_t
sample_rate
,
const
float
*
samples
,
int32_t
n
);
...
...
@@ -230,8 +230,9 @@ SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream,
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer
/// @param stream A pointer returned by CreateOnlineStream
SHERPA_ONNX_API
int32_t
IsOnlineStreamReady
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
int32_t
IsOnlineStreamReady
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
);
/// Call this function to run the neural network model and decoding.
//
...
...
@@ -243,8 +244,9 @@ SHERPA_ONNX_API int32_t IsOnlineStreamReady(
/// DecodeOnlineStream(recognizer, stream);
/// }
///
SHERPA_ONNX_API
void
DecodeOnlineStream
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
void
DecodeOnlineStream
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
);
/// This function is similar to DecodeOnlineStream(). It decodes multiple
/// OnlineStream in parallel.
...
...
@@ -257,8 +259,8 @@ SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer,
/// CreateOnlineRecognizer()
/// @param n Number of elements in the given streams array.
SHERPA_ONNX_API
void
DecodeMultipleOnlineStreams
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
**
streams
,
int32_t
n
);
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
**
streams
,
int32_t
n
);
/// Get the decoding results so far for an OnlineStream.
///
...
...
@@ -268,7 +270,8 @@ SHERPA_ONNX_API void DecodeMultipleOnlineStreams(
/// DestroyOnlineRecognizerResult() to free the returned pointer to
/// avoid memory leak.
SHERPA_ONNX_API
const
SherpaOnnxOnlineRecognizerResult
*
GetOnlineStreamResult
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
);
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
);
/// Destroy the pointer returned by GetOnlineStreamResult().
///
...
...
@@ -281,35 +284,36 @@ SHERPA_ONNX_API void DestroyOnlineRecognizerResult(
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer().
/// @param stream A pointer returned by CreateOnlineStream
SHERPA_ONNX_API
void
Reset
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
void
Reset
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
);
/// Signal that no more audio samples would be available.
/// After this call, you cannot call AcceptWaveform() any more.
///
/// @param stream A pointer returned by CreateOnlineStream()
SHERPA_ONNX_API
void
InputFinished
(
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
void
InputFinished
(
const
SherpaOnnxOnlineStream
*
stream
);
/// Return 1 if an endpoint has been detected.
///
/// @param recognizer A pointer returned by CreateOnlineRecognizer()
/// @param stream A pointer returned by CreateOnlineStream()
/// @return Return 1 if an endpoint is detected. Return 0 otherwise.
SHERPA_ONNX_API
int32_t
IsEndpoint
(
SherpaOnnxOnlineRecognizer
*
recognizer
,
SherpaOnnxOnlineStream
*
stream
);
SHERPA_ONNX_API
int32_t
IsEndpoint
(
const
SherpaOnnxOnlineRecognizer
*
recognizer
,
const
SherpaOnnxOnlineStream
*
stream
);
// for displaying results on Linux/macOS.
SHERPA_ONNX_API
typedef
struct
SherpaOnnxDisplay
SherpaOnnxDisplay
;
/// Create a display object. Must be freed using DestroyDisplay to avoid
/// memory leak.
SHERPA_ONNX_API
SherpaOnnxDisplay
*
CreateDisplay
(
int32_t
max_word_per_line
);
SHERPA_ONNX_API
const
SherpaOnnxDisplay
*
CreateDisplay
(
int32_t
max_word_per_line
);
SHERPA_ONNX_API
void
DestroyDisplay
(
SherpaOnnxDisplay
*
display
);
SHERPA_ONNX_API
void
DestroyDisplay
(
const
SherpaOnnxDisplay
*
display
);
/// Print the result.
SHERPA_ONNX_API
void
SherpaOnnxPrint
(
SherpaOnnxDisplay
*
display
,
int32_t
idx
,
const
char
*
s
);
SHERPA_ONNX_API
void
SherpaOnnxPrint
(
const
SherpaOnnxDisplay
*
display
,
int32_t
idx
,
const
char
*
s
);
// ============================================================
// For offline ASR (i.e., non-streaming ASR)
// ============================================================
...
...
@@ -769,7 +773,7 @@ typedef void (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
int32_t
n
,
void
*
arg
);
typedef
void
(
*
SherpaOnnxGeneratedAudioProgressCallback
)(
const
float
*
samples
,
int32_t
n
,
float
p
);
int32_t
n
,
float
p
);
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineTts
SherpaOnnxOfflineTts
;
...
...
@@ -839,7 +843,9 @@ SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
SHERPA_ONNX_API
void
SherpaOnnxFreeWave
(
const
SherpaOnnxWave
*
wave
);
// Spoken language identification
// ============================================================
// For spoken language identification
// ============================================================
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpokenLanguageIdentificationWhisperConfig
{
...
...
@@ -893,6 +899,169 @@ SherpaOnnxSpokenLanguageIdentificationCompute(
SHERPA_ONNX_API
void
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
const
SherpaOnnxSpokenLanguageIdentificationResult
*
r
);
// ============================================================
// For speaker embedding extraction
// ============================================================
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpeakerEmbeddingExtractorConfig
{
const
char
*
model
;
int32_t
num_threads
;
int32_t
debug
;
const
char
*
provider
;
}
SherpaOnnxSpeakerEmbeddingExtractorConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpeakerEmbeddingExtractor
SherpaOnnxSpeakerEmbeddingExtractor
;
// The user has to invoke SherpaOnnxDestroySpeakerEmbeddingExtractor()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxSpeakerEmbeddingExtractor
*
SherpaOnnxCreateSpeakerEmbeddingExtractor
(
const
SherpaOnnxSpeakerEmbeddingExtractorConfig
*
config
);
SHERPA_ONNX_API
void
SherpaOnnxDestroySpeakerEmbeddingExtractor
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
);
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingExtractorDim
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
);
// The user has to invoke DestroyOnlineStream() to free the returned pointer
// to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxOnlineStream
*
SherpaOnnxSpeakerEmbeddingExtractorCreateStream
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
);
// Return 1 if the stream has enough feature frames for computing embeddings.
// Return 0 otherwise.
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingExtractorIsReady
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
,
const
SherpaOnnxOnlineStream
*
s
);
// Compute the embedding of the stream.
//
// @return Return a pointer pointing to an array containing the embedding.
// The length of the array is `dim` as returned by
// SherpaOnnxSpeakerEmbeddingExtractorDim(p)
//
// The user has to invoke SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding()
// to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API
const
float
*
SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding
(
const
SherpaOnnxSpeakerEmbeddingExtractor
*
p
,
const
SherpaOnnxOnlineStream
*
s
);
SHERPA_ONNX_API
void
SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding
(
const
float
*
v
);
SHERPA_ONNX_API
typedef
struct
SherpaOnnxSpeakerEmbeddingManager
SherpaOnnxSpeakerEmbeddingManager
;
// The user has to invoke SherpaOnnxDestroySpeakerEmbeddingManager()
// to free the returned pointer to avoid memory leak
SHERPA_ONNX_API
const
SherpaOnnxSpeakerEmbeddingManager
*
SherpaOnnxCreateSpeakerEmbeddingManager
(
int32_t
dim
);
SHERPA_ONNX_API
void
SherpaOnnxDestroySpeakerEmbeddingManager
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
);
// Register the embedding of a user
//
// @param name The name of the user
// @param p Pointer to an array containing the embeddings. The length of the
// array must be equal to `dim` used to construct the manager `p`.
//
// @return Return 1 if added successfully. Return 0 on error
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerAdd
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
);
// @param v Pointer to an array of embeddings. If there are n embeddings, then
// v[0] is the pointer to the 0-th array containing the embeddings
// v[1] is the pointer to the 1-st array containing the embeddings
// v[n-1] is the pointer to the last array containing the embeddings
// v[n] is a NULL pointer
// @return Return 1 if added successfully. Return 0 on error
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerAddList
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
**
v
);
// Similar to SherpaOnnxSpeakerEmbeddingManagerAddList() but the memory
// is flattened.
//
// The length of the input array should be `n * dim`.
//
// @return Return 1 if added successfully. Return 0 on error
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerAddListFlattened
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
,
int32_t
n
);
// Remove a user.
// @param naem The name of the user to remove.
// @return Return 1 if removed successfully; return 0 on error.
//
// Note if the user does not exist, it also returns 0.
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerRemove
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
);
// Search if an existing users' embedding matches the given one.
//
// @param p Pointer to an array containing the embedding. The dim
// of the array must equal to `dim` used to construct the manager `p`.
// @param threshold A value between 0 and 1. If the similarity score exceeds
// this threshold, we say a match is found.
// @return Returns the name of the user if found. Return NULL if not found.
// If not NULL, the caller has to invoke
// SherpaOnnxSpeakerEmbeddingManagerFreeSearch() to free the returned
// pointer to avoid memory leak.
SHERPA_ONNX_API
const
char
*
SherpaOnnxSpeakerEmbeddingManagerSearch
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
float
*
v
,
float
threshold
);
SHERPA_ONNX_API
void
SherpaOnnxSpeakerEmbeddingManagerFreeSearch
(
const
char
*
name
);
// Check whether the input embedding matches the embedding of the input
// speaker.
//
// It is for speaker verification.
//
// @param name The target speaker name.
// @param p The input embedding to check.
// @param threshold A value between 0 and 1.
// @return Return 1 if it matches. Otherwise, it returns 0.
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerVerify
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
,
const
float
*
v
,
float
threshold
);
// Return 1 if the user with the name is in the manager.
// Return 0 if the user does not exist.
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerContains
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
,
const
char
*
name
);
// Return number of speakers in the manager.
SHERPA_ONNX_API
int32_t
SherpaOnnxSpeakerEmbeddingManagerNumSpeakers
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
);
// Return the name of all speakers in the manager.
//
// @return Return an array of pointers `ans`. If there are n speakers, then
// - ans[0] contains the name of the 0-th speaker
// - ans[1] contains the name of the 1-st speaker
// - ans[n-1] contains the name of the last speaker
// - ans[n] is NULL
// If there are no users at all, then ans[0] is NULL. In any case,
// `ans` is not NULL.
//
// Each name is NULL-terminated
//
// The caller has to invoke SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers()
// to free the returned pointer to avoid memory leak.
SHERPA_ONNX_API
const
char
*
const
*
SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers
(
const
SherpaOnnxSpeakerEmbeddingManager
*
p
);
SHERPA_ONNX_API
void
SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers
(
const
char
*
const
*
names
);
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
...
...
sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @
2e0bcca
...
...
@@ -168,7 +168,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
ans
.
samples
.
insert
(
ans
.
samples
.
end
(),
audio
.
samples
.
begin
(),
audio
.
samples
.
end
());
if
(
callback
)
{
callback
(
audio
.
samples
.
data
(),
audio
.
samples
.
size
(),
b
*
1
.
0
/
num_batches
);
callback
(
audio
.
samples
.
data
(),
audio
.
samples
.
size
(),
b
*
1
.
0
/
num_batches
);
// Caution(fangjun): audio is freed when the callback returns, so users
// should copy the data if they want to access the data after
// the callback returns to avoid segmentation fault.
...
...
sherpa-onnx/csrc/offline-tts.h
查看文件 @
2e0bcca
...
...
@@ -54,8 +54,8 @@ struct GeneratedAudio {
class
OfflineTtsImpl
;
using
GeneratedAudioCallback
=
std
::
function
<
void
(
const
float
*
/*samples*/
,
int32_t
/*n*/
,
float
/*progress*/
)
>
;
using
GeneratedAudioCallback
=
std
::
function
<
void
(
const
float
*
/*samples*/
,
int32_t
/*n*/
,
float
/*progress*/
)
>
;
class
OfflineTts
{
public
:
...
...
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
查看文件 @
2e0bcca
...
...
@@ -44,7 +44,8 @@ static void Handler(int32_t /*sig*/) {
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Exiting
\n
"
);
}
static
void
AudioGeneratedCallback
(
const
float
*
s
,
int32_t
n
)
{
static
void
AudioGeneratedCallback
(
const
float
*
s
,
int32_t
n
,
float
/*progress*/
)
{
if
(
n
>
0
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_buffer
.
mutex
);
g_buffer
.
samples
.
push
({
s
,
s
+
n
});
...
...
sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
查看文件 @
2e0bcca
...
...
@@ -47,7 +47,8 @@ static void Handler(int32_t /*sig*/) {
fprintf
(
stderr
,
"
\n
Caught Ctrl + C. Exiting
\n
"
);
}
static
void
AudioGeneratedCallback
(
const
float
*
s
,
int32_t
n
,
float
/*progress*/
)
{
static
void
AudioGeneratedCallback
(
const
float
*
s
,
int32_t
n
,
float
/*progress*/
)
{
if
(
n
>
0
)
{
Samples
samples
;
samples
.
data
=
std
::
vector
<
float
>
{
s
,
s
+
n
};
...
...
sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
查看文件 @
2e0bcca
...
...
@@ -9,9 +9,8 @@
#include "sherpa-onnx/csrc/parse-options.h"
#include "sherpa-onnx/csrc/wave-writer.h"
void
audioCallback
(
const
float
*
samples
,
int32_t
n
,
float
progress
)
{
printf
(
"sample=%d, progress=%f
\n
"
,
n
,
progress
);
void
audioCallback
(
const
float
*
samples
,
int32_t
n
,
float
progress
)
{
printf
(
"sample=%d, progress=%f
\n
"
,
n
,
progress
);
}
int
main
(
int32_t
argc
,
char
*
argv
[])
{
...
...
sherpa-onnx/csrc/speaker-embedding-manager.cc
查看文件 @
2e0bcca
...
...
@@ -93,7 +93,7 @@ class SpeakerEmbeddingManager::Impl {
int32_t
num_rows
=
embedding_matrix_
.
rows
();
if
(
row_idx
<
num_rows
-
1
)
{
embedding_matrix_
.
block
(
row_idx
,
0
,
num_rows
-
-
1
-
row_idx
,
dim_
)
=
embedding_matrix_
.
block
(
row_idx
,
0
,
num_rows
-
1
-
row_idx
,
dim_
)
=
embedding_matrix_
.
bottomRows
(
num_rows
-
1
-
row_idx
);
}
...
...
sherpa-onnx/jni/jni.cc
查看文件 @
2e0bcca
...
...
@@ -795,9 +795,10 @@ class SherpaOnnxOfflineTts {
explicit
SherpaOnnxOfflineTts
(
const
OfflineTtsConfig
&
config
)
:
tts_
(
config
)
{}
GeneratedAudio
Generate
(
const
std
::
string
&
text
,
int64_t
sid
=
0
,
float
speed
=
1.0
,
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback
=
nullptr
)
const
{
GeneratedAudio
Generate
(
const
std
::
string
&
text
,
int64_t
sid
=
0
,
float
speed
=
1.0
,
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback
=
nullptr
)
const
{
return
tts_
.
Generate
(
text
,
sid
,
speed
,
callback
);
}
...
...
sherpa-onnx/python/csrc/offline-tts.cc
查看文件 @
2e0bcca
...
...
@@ -55,14 +55,16 @@ void PybindOfflineTts(py::module *m) {
.
def
(
"generate"
,
[](
const
PyClass
&
self
,
const
std
::
string
&
text
,
int64_t
sid
,
float
speed
,
std
::
function
<
void
(
py
::
array_t
<
float
>
,
float
)
>
callback
)
float
speed
,
std
::
function
<
void
(
py
::
array_t
<
float
>
,
float
)
>
callback
)
->
GeneratedAudio
{
if
(
!
callback
)
{
return
self
.
Generate
(
text
,
sid
,
speed
);
}
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback_wrapper
=
[
callback
](
const
float
*
samples
,
int32_t
n
,
float
progress
)
{
std
::
function
<
void
(
const
float
*
,
int32_t
,
float
)
>
callback_wrapper
=
[
callback
](
const
float
*
samples
,
int32_t
n
,
float
progress
)
{
// CAUTION(fangjun): we have to copy samples since it is
// freed once the call back returns.
...
...
请
注册
或
登录
后发表评论