Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-01-03 12:17:26 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-01-03 12:17:26 +0800
Commit
9aa4897a9e31cf78c75c76b30ae10198d893b512
9aa4897a
1 parent
a4365dad
Add C API for MatchaTTS models (#1675)
隐藏空白字符变更
内嵌
并排对比
正在显示
7 个修改的文件
包含
260 行增加
和
3 行删除
.github/workflows/c-api.yaml
c-api-examples/CMakeLists.txt
c-api-examples/matcha-tts-en-c-api.c
c-api-examples/matcha-tts-zh-c-api.c
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/offline-tts.h
.github/workflows/c-api.yaml
查看文件 @
9aa4897
...
...
@@ -81,6 +81,51 @@ jobs:
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
fi
-
name
:
Test Matcha TTS (zh)
shell
:
bash
run
:
|
gcc -o matcha-tts-zh-c-api ./c-api-examples/matcha-tts-zh-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
./matcha-tts-zh-c-api
-
name
:
Test Matcha TTS (en)
shell
:
bash
run
:
|
gcc -o matcha-tts-en-c-api ./c-api-examples/matcha-tts-en-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
./matcha-tts-en-c-api
-
uses
:
actions/upload-artifact@v4
with
:
name
:
matcha-tts-${{ matrix.os }}
path
:
./generated-matcha-*.wav
-
name
:
Test vad + Whisper tiny.en
shell
:
bash
run
:
|
...
...
c-api-examples/CMakeLists.txt
查看文件 @
9aa4897
...
...
@@ -7,6 +7,12 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
if
(
SHERPA_ONNX_ENABLE_TTS
)
add_executable
(
offline-tts-c-api offline-tts-c-api.c
)
target_link_libraries
(
offline-tts-c-api sherpa-onnx-c-api cargs
)
add_executable
(
matcha-tts-zh-c-api matcha-tts-zh-c-api.c
)
target_link_libraries
(
matcha-tts-zh-c-api sherpa-onnx-c-api
)
add_executable
(
matcha-tts-en-c-api matcha-tts-en-c-api.c
)
target_link_libraries
(
matcha-tts-en-c-api sherpa-onnx-c-api
)
endif
()
if
(
SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION
)
...
...
c-api-examples/matcha-tts-en-c-api.c
0 → 100644
查看文件 @
9aa4897
// c-api-examples/matcha-tts-en-c-api.c
//
// Copyright (c) 2025 Xiaomi Corporation
// This file shows how to use sherpa-onnx C API
// for English TTS with MatchaTTS.
//
// clang-format off
/*
Usage
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
./matcha-tts-en-c-api
*/
// clang-format on
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
static
int32_t
ProgressCallback
(
const
float
*
samples
,
int32_t
num_samples
,
float
progress
)
{
fprintf
(
stderr
,
"Progress: %.3f%%
\n
"
,
progress
*
100
);
// return 1 to continue generating
// return 0 to stop generating
return
1
;
}
int32_t
main
(
int32_t
argc
,
char
*
argv
[])
{
SherpaOnnxOfflineTtsConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
config
.
model
.
matcha
.
acoustic_model
=
"./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"
;
config
.
model
.
matcha
.
vocoder
=
"./hifigan_v2.onnx"
;
config
.
model
.
matcha
.
tokens
=
"./matcha-icefall-en_US-ljspeech/tokens.txt"
;
config
.
model
.
matcha
.
data_dir
=
"./matcha-icefall-en_US-ljspeech/espeak-ng-data"
;
config
.
model
.
num_threads
=
1
;
// If you don't want to see debug messages, please set it to 0
config
.
model
.
debug
=
1
;
const
char
*
filename
=
"./generated-matcha-en.wav"
;
const
char
*
text
=
"Today as always, men fall into two groups: slaves and free men. Whoever "
"does not have two-thirds of his day for himself, is a slave, whatever "
"he may be: a statesman, a businessman, an official, or a scholar. "
"Friends fell out often because life was changing so fast. The easiest "
"thing in the world was to lose touch with someone."
;
SherpaOnnxOfflineTts
*
tts
=
SherpaOnnxCreateOfflineTts
(
&
config
);
int32_t
sid
=
0
;
float
speed
=
1
.
0
;
// larger -> faster in speech speed
#if 0
// If you don't want to use a callback, then please enable this branch
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
#else
const
SherpaOnnxGeneratedAudio
*
audio
=
SherpaOnnxOfflineTtsGenerateWithProgressCallback
(
tts
,
text
,
sid
,
speed
,
ProgressCallback
);
#endif
SherpaOnnxWriteWave
(
audio
->
samples
,
audio
->
n
,
audio
->
sample_rate
,
filename
);
SherpaOnnxDestroyOfflineTtsGeneratedAudio
(
audio
);
SherpaOnnxDestroyOfflineTts
(
tts
);
fprintf
(
stderr
,
"Input text is: %s
\n
"
,
text
);
fprintf
(
stderr
,
"Speaker ID is is: %d
\n
"
,
sid
);
fprintf
(
stderr
,
"Saved to: %s
\n
"
,
filename
);
return
0
;
}
...
...
c-api-examples/matcha-tts-zh-c-api.c
0 → 100644
查看文件 @
9aa4897
// c-api-examples/matcha-tts-zh-c-api.c
//
// Copyright (c) 2025 Xiaomi Corporation
// This file shows how to use sherpa-onnx C API
// for Chinese TTS with MatchaTTS.
//
// clang-format off
/*
Usage
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
./matcha-tts-zh-c-api
*/
// clang-format on
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "sherpa-onnx/c-api/c-api.h"
static
int32_t
ProgressCallback
(
const
float
*
samples
,
int32_t
num_samples
,
float
progress
)
{
fprintf
(
stderr
,
"Progress: %.3f%%
\n
"
,
progress
*
100
);
// return 1 to continue generating
// return 0 to stop generating
return
1
;
}
int32_t
main
(
int32_t
argc
,
char
*
argv
[])
{
SherpaOnnxOfflineTtsConfig
config
;
memset
(
&
config
,
0
,
sizeof
(
config
));
config
.
model
.
matcha
.
acoustic_model
=
"./matcha-icefall-zh-baker/model-steps-3.onnx"
;
config
.
model
.
matcha
.
vocoder
=
"./hifigan_v2.onnx"
;
config
.
model
.
matcha
.
lexicon
=
"./matcha-icefall-zh-baker/lexicon.txt"
;
config
.
model
.
matcha
.
tokens
=
"./matcha-icefall-zh-baker/tokens.txt"
;
config
.
model
.
matcha
.
dict_dir
=
"./matcha-icefall-zh-baker/dict"
;
config
.
model
.
num_threads
=
1
;
// If you don't want to see debug messages, please set it to 0
config
.
model
.
debug
=
1
;
// clang-format off
config
.
rule_fsts
=
"./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst"
;
// clang-format on
const
char
*
filename
=
"./generated-matcha-zh.wav"
;
const
char
*
text
=
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
"涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
"受着生命的奇迹与温柔."
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; "
"经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
;
SherpaOnnxOfflineTts
*
tts
=
SherpaOnnxCreateOfflineTts
(
&
config
);
int32_t
sid
=
0
;
float
speed
=
1
.
0
;
// larger -> faster in speech speed
#if 0
// If you don't want to use a callback, then please enable this branch
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
#else
const
SherpaOnnxGeneratedAudio
*
audio
=
SherpaOnnxOfflineTtsGenerateWithProgressCallback
(
tts
,
text
,
sid
,
speed
,
ProgressCallback
);
#endif
SherpaOnnxWriteWave
(
audio
->
samples
,
audio
->
n
,
audio
->
sample_rate
,
filename
);
SherpaOnnxDestroyOfflineTtsGeneratedAudio
(
audio
);
SherpaOnnxDestroyOfflineTts
(
tts
);
fprintf
(
stderr
,
"Input text is: %s
\n
"
,
text
);
fprintf
(
stderr
,
"Speaker ID is is: %d
\n
"
,
sid
);
fprintf
(
stderr
,
"Saved to: %s
\n
"
,
filename
);
return
0
;
}
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
9aa4897
...
...
@@ -1058,6 +1058,7 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
const
SherpaOnnxOfflineTtsConfig
*
config
)
{
sherpa_onnx
::
OfflineTtsConfig
tts_config
;
// vits
tts_config
.
model
.
vits
.
model
=
SHERPA_ONNX_OR
(
config
->
model
.
vits
.
model
,
""
);
tts_config
.
model
.
vits
.
lexicon
=
SHERPA_ONNX_OR
(
config
->
model
.
vits
.
lexicon
,
""
);
...
...
@@ -1073,6 +1074,24 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
tts_config
.
model
.
vits
.
dict_dir
=
SHERPA_ONNX_OR
(
config
->
model
.
vits
.
dict_dir
,
""
);
// matcha
tts_config
.
model
.
matcha
.
acoustic_model
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
acoustic_model
,
""
);
tts_config
.
model
.
matcha
.
vocoder
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
vocoder
,
""
);
tts_config
.
model
.
matcha
.
lexicon
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
lexicon
,
""
);
tts_config
.
model
.
matcha
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
tokens
,
""
);
tts_config
.
model
.
matcha
.
data_dir
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
data_dir
,
""
);
tts_config
.
model
.
matcha
.
noise_scale
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
noise_scale
,
0.667
);
tts_config
.
model
.
matcha
.
length_scale
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
length_scale
,
1.0
);
tts_config
.
model
.
matcha
.
dict_dir
=
SHERPA_ONNX_OR
(
config
->
model
.
matcha
.
dict_dir
,
""
);
tts_config
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
tts_config
.
model
.
debug
=
config
->
model
.
debug
;
tts_config
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
...
...
@@ -1082,7 +1101,7 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
tts_config
.
rule_fsts
=
SHERPA_ONNX_OR
(
config
->
rule_fsts
,
""
);
tts_config
.
rule_fars
=
SHERPA_ONNX_OR
(
config
->
rule_fars
,
""
);
tts_config
.
max_num_sentences
=
SHERPA_ONNX_OR
(
config
->
max_num_sentences
,
2
);
tts_config
.
max_num_sentences
=
SHERPA_ONNX_OR
(
config
->
max_num_sentences
,
1
);
if
(
tts_config
.
model
.
debug
)
{
#if __OHOS__
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
9aa4897
...
...
@@ -894,15 +894,28 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
float
noise_scale
;
float
noise_scale_w
;
float
length_scale
;
// < 1, faster in speed; > 1, slower in speed
float
length_scale
;
// < 1, faster in spee
ch spee
d; > 1, slower in speed
const
char
*
dict_dir
;
}
SherpaOnnxOfflineTtsVitsModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineTtsMatchaModelConfig
{
const
char
*
acoustic_model
;
const
char
*
vocoder
;
const
char
*
lexicon
;
const
char
*
tokens
;
const
char
*
data_dir
;
float
noise_scale
;
float
length_scale
;
// < 1, faster in speech speed; > 1, slower in speed
const
char
*
dict_dir
;
}
SherpaOnnxOfflineTtsMatchaModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineTtsModelConfig
{
SherpaOnnxOfflineTtsVitsModelConfig
vits
;
int32_t
num_threads
;
int32_t
debug
;
const
char
*
provider
;
SherpaOnnxOfflineTtsMatchaModelConfig
matcha
;
}
SherpaOnnxOfflineTtsModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineTtsConfig
{
...
...
sherpa-onnx/csrc/offline-tts.h
查看文件 @
9aa4897
...
...
@@ -30,7 +30,7 @@ struct OfflineTtsConfig {
// Maximum number of sentences that we process at a time.
// This is to avoid OOM for very long input text.
// If you set it to -1, then we process all sentences in a single batch.
int32_t
max_num_sentences
=
2
;
int32_t
max_num_sentences
=
1
;
OfflineTtsConfig
()
=
default
;
OfflineTtsConfig
(
const
OfflineTtsModelConfig
&
model
,
...
...
请
注册
或
登录
后发表评论