Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-06-20 11:22:52 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-06-20 11:22:52 +0800
Commit
6982b86c6617cb2dbce9953bfb29c9546af7b3c8
6982b86c
1 parent
a6095f5f
Support extra languages in multi-lang kokoro tts (#2303)
隐藏空白字符变更
内嵌
并排对比
正在显示
28 个修改的文件
包含
187 行增加
和
49 行删除
.github/workflows/test-build-wheel.yaml
README.md
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
flutter/sherpa_onnx/lib/src/tts.dart
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
scripts/dotnet/OfflineTtsKokoroModelConfig.cs
scripts/go/sherpa_onnx.go
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/c-api/cxx-api.cc
sherpa-onnx/c-api/cxx-api.h
sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
sherpa-onnx/csrc/offline-speech-denoiser-model-config.h
sherpa-onnx/csrc/offline-tts-frontend.h
sherpa-onnx/csrc/offline-tts-kokoro-impl.h
sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
sherpa-onnx/csrc/piper-phonemize-lexicon.cc
sherpa-onnx/csrc/piper-phonemize-lexicon.h
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java
sherpa-onnx/jni/offline-tts.cc
sherpa-onnx/kotlin-api/Tts.kt
sherpa-onnx/pascal-api/sherpa_onnx.pas
sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
swift-api-examples/SherpaOnnx.swift
wasm/tts/sherpa-onnx-tts.js
wasm/tts/sherpa-onnx-wasm-main-tts.cc
.github/workflows/test-build-wheel.yaml
查看文件 @
6982b86
...
...
@@ -35,18 +35,18 @@ jobs:
matrix
:
# See https://github.com/actions/runner-images
include
:
-
os
:
ubuntu-22.04
python-version
:
"
3.7"
-
os
:
ubuntu-22.04
-
os
:
ubuntu-latest
python-version
:
"
3.8"
-
os
:
ubuntu-
22.04
-
os
:
ubuntu-
latest
python-version
:
"
3.9"
-
os
:
ubuntu-
22.04
-
os
:
ubuntu-
latest
python-version
:
"
3.10"
-
os
:
ubuntu-
22.04
-
os
:
ubuntu-
latest
python-version
:
"
3.11"
-
os
:
ubuntu-
22.04
-
os
:
ubuntu-
latest
python-version
:
"
3.12"
-
os
:
ubuntu-latest
python-version
:
"
3.13"
-
os
:
macos-13
python-version
:
"
3.8"
...
...
@@ -103,7 +103,7 @@ jobs:
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1 -j"
export SHERPA_ONNX_MAKE_ARGS="VERBOSE=1 -j
2
"
python3 setup.py bdist_wheel
ls -lh dist
...
...
README.md
查看文件 @
6982b86
### Supported functions
|Speech recognition|
Speech synthesis | Source separation
|
|Speech recognition|
[
Speech synthesis
][
tts-url
]
|
[
Source separation
][
ss-url
]
|
|------------------|------------------|-------------------|
| ✔️ | ✔️ | ✔️ |
|Speaker identification|
Speaker diarization
| Speaker verification |
|Speaker identification|
[
Speaker diarization
][
sd-url
]
| Speaker verification |
|----------------------|-------------------- |------------------------|
| ✔️ | ✔️ | ✔️ |
|
Spoken Language identification | Audio tagging | Voice activity detection
|
|
[
Spoken Language identification
][
slid-url
]
|
[
Audio tagging
][
at-url
]
|
[
Voice activity detection
][
vad-url
]
|
|--------------------------------|---------------|--------------------------|
| ✔️ | ✔️ | ✔️ |
|
Keyword spotting | Add punctuation | Speech enhancement
|
|
[
Keyword spotting
][
kws-url
]
|
[
Add punctuation
][
punct-url
]
|
[
Speech enhancement
][
se-url
]
|
|------------------|-----------------|--------------------|
| ✔️ | ✔️ | ✔️ |
...
...
@@ -501,3 +501,12 @@ It uses sherpa-onnx for speech-to-text and text-to-speech.
[
spleeter
]:
https://github.com/deezer/spleeter
[
UVR
]:
https://github.com/Anjok07/ultimatevocalremovergui
[
gtcrn
]:
https://github.com/Xiaobin-Rong/gtcrn
[
tts-url
]:
https://k2-fsa.github.io/sherpa/onnx/tts/all-in-one.html
[
ss-url
]:
https://k2-fsa.github.io/sherpa/onnx/source-separation/index.html
[
sd-url
]:
https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/index.html
[
slid-url
]:
https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/index.html
[
at-url
]:
https://k2-fsa.github.io/sherpa/onnx/audio-tagging/index.html
[
vad-url
]:
https://k2-fsa.github.io/sherpa/onnx/vad/index.html
[
kws-url
]:
https://k2-fsa.github.io/sherpa/onnx/kws/index.html
[
punct-url
]:
https://k2-fsa.github.io/sherpa/onnx/punctuation/index.html
[
se-url
]:
https://k2-fsa.github.io/sherpa/onnx/speech-enhancment/index.html
...
...
flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
查看文件 @
6982b86
...
...
@@ -201,6 +201,7 @@ final class SherpaOnnxOfflineTtsKokoroModelConfig extends Struct {
external
double
lengthScale
;
external
Pointer
<
Utf8
>
dictDir
;
external
Pointer
<
Utf8
>
lexicon
;
external
Pointer
<
Utf8
>
lang
;
}
final
class
SherpaOnnxOfflineTtsModelConfig
extends
Struct
{
...
...
flutter/sherpa_onnx/lib/src/tts.dart
查看文件 @
6982b86
...
...
@@ -117,6 +117,7 @@ class OfflineTtsKokoroModelConfig {
this
.
lengthScale
=
1.0
,
this
.
dictDir
=
''
,
this
.
lexicon
=
''
,
this
.
lang
=
''
,
});
factory
OfflineTtsKokoroModelConfig
.
fromJson
(
Map
<
String
,
dynamic
>
json
)
{
...
...
@@ -128,12 +129,13 @@ class OfflineTtsKokoroModelConfig {
lengthScale:
(
json
[
'lengthScale'
]
as
num
?)?.
toDouble
()
??
1.0
,
dictDir:
json
[
'dictDir'
]
as
String
?
??
''
,
lexicon:
json
[
'lexicon'
]
as
String
?
??
''
,
lang:
json
[
'lang'
]
as
String
?
??
''
,
);
}
@override
String
toString
()
{
return
'OfflineTtsKokoroModelConfig(model:
$model
, voices:
$voices
, tokens:
$tokens
, dataDir:
$dataDir
, lengthScale:
$lengthScale
, dictDir:
$dictDir
, lexicon:
$lexicon
)'
;
return
'OfflineTtsKokoroModelConfig(model:
$model
, voices:
$voices
, tokens:
$tokens
, dataDir:
$dataDir
, lengthScale:
$lengthScale
, dictDir:
$dictDir
, lexicon:
$lexicon
, lang:
$lang
)'
;
}
Map
<
String
,
dynamic
>
toJson
()
=>
{
...
...
@@ -144,6 +146,7 @@ class OfflineTtsKokoroModelConfig {
'lengthScale'
:
lengthScale
,
'dictDir'
:
dictDir
,
'lexicon'
:
lexicon
,
'lang'
:
lang
,
};
final
String
model
;
...
...
@@ -153,6 +156,7 @@ class OfflineTtsKokoroModelConfig {
final
double
lengthScale
;
final
String
dictDir
;
final
String
lexicon
;
final
String
lang
;
}
class
OfflineTtsModelConfig
{
...
...
@@ -286,6 +290,7 @@ class OfflineTts {
c
.
ref
.
model
.
kokoro
.
lengthScale
=
config
.
model
.
kokoro
.
lengthScale
;
c
.
ref
.
model
.
kokoro
.
dictDir
=
config
.
model
.
kokoro
.
dictDir
.
toNativeUtf8
();
c
.
ref
.
model
.
kokoro
.
lexicon
=
config
.
model
.
kokoro
.
lexicon
.
toNativeUtf8
();
c
.
ref
.
model
.
kokoro
.
lang
=
config
.
model
.
kokoro
.
lang
.
toNativeUtf8
();
c
.
ref
.
model
.
numThreads
=
config
.
model
.
numThreads
;
c
.
ref
.
model
.
debug
=
config
.
model
.
debug
?
1
:
0
;
...
...
@@ -302,6 +307,7 @@ class OfflineTts {
calloc
.
free
(
c
.
ref
.
ruleFsts
);
calloc
.
free
(
c
.
ref
.
model
.
provider
);
calloc
.
free
(
c
.
ref
.
model
.
kokoro
.
lang
);
calloc
.
free
(
c
.
ref
.
model
.
kokoro
.
lexicon
);
calloc
.
free
(
c
.
ref
.
model
.
kokoro
.
dictDir
);
calloc
.
free
(
c
.
ref
.
model
.
kokoro
.
dataDir
);
...
...
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc
查看文件 @
6982b86
...
...
@@ -70,6 +70,7 @@ static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig(
SHERPA_ONNX_ASSIGN_ATTR_FLOAT
(
length_scale
,
lengthScale
);
SHERPA_ONNX_ASSIGN_ATTR_STR
(
dict_dir
,
dictDir
);
SHERPA_ONNX_ASSIGN_ATTR_STR
(
lexicon
,
lexicon
);
SHERPA_ONNX_ASSIGN_ATTR_STR
(
lang
,
lang
);
return
c
;
}
...
...
@@ -177,6 +178,7 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
SHERPA_ONNX_DELETE_C_STR
(
c
.
model
.
kokoro
.
data_dir
);
SHERPA_ONNX_DELETE_C_STR
(
c
.
model
.
kokoro
.
dict_dir
);
SHERPA_ONNX_DELETE_C_STR
(
c
.
model
.
kokoro
.
lexicon
);
SHERPA_ONNX_DELETE_C_STR
(
c
.
model
.
kokoro
.
lang
);
SHERPA_ONNX_DELETE_C_STR
(
c
.
model
.
provider
);
...
...
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets
查看文件 @
6982b86
...
...
@@ -36,6 +36,7 @@ export class OfflineTtsKokoroModelConfig {
public lengthScale: number = 1.0;
public dictDir: string = '';
public lexicon: string = '';
public lang: string = '';
}
export class OfflineTtsModelConfig {
...
...
scripts/dotnet/OfflineTtsKokoroModelConfig.cs
查看文件 @
6982b86
...
...
@@ -18,6 +18,7 @@ namespace SherpaOnnx
DictDir
=
""
;
Lexicon
=
""
;
Lang
=
""
;
}
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Model
;
...
...
@@ -38,5 +39,8 @@ namespace SherpaOnnx
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Lexicon
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Lang
;
}
}
...
...
scripts/go/sherpa_onnx.go
查看文件 @
6982b86
...
...
@@ -857,6 +857,7 @@ type OfflineTtsKokoroModelConfig struct {
DataDir
string
// Path to espeak-ng-data directory
DictDir
string
// Path to dict directory
Lexicon
string
// Path to lexicon files
Lang
string
// Example: es for Spanish, fr-fr for French. Can be empty
LengthScale
float32
// Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}
...
...
@@ -1006,6 +1007,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
c
.
model
.
kokoro
.
lexicon
=
C
.
CString
(
config
.
Model
.
Kokoro
.
Lexicon
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model
.
kokoro
.
lexicon
))
c
.
model
.
kokoro
.
lang
=
C
.
CString
(
config
.
Model
.
Kokoro
.
Lang
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
model
.
kokoro
.
lang
))
c
.
model
.
kokoro
.
length_scale
=
C
.
float
(
config
.
Model
.
Kokoro
.
LengthScale
)
c
.
model
.
num_threads
=
C
.
int
(
config
.
Model
.
NumThreads
)
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
6982b86
...
...
@@ -1164,6 +1164,7 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
SHERPA_ONNX_OR
(
config
->
model
.
kokoro
.
dict_dir
,
""
);
tts_config
.
model
.
kokoro
.
lexicon
=
SHERPA_ONNX_OR
(
config
->
model
.
kokoro
.
lexicon
,
""
);
tts_config
.
model
.
kokoro
.
lang
=
SHERPA_ONNX_OR
(
config
->
model
.
kokoro
.
lang
,
""
);
tts_config
.
model
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model
.
num_threads
,
1
);
tts_config
.
model
.
debug
=
config
->
model
.
debug
;
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
6982b86
...
...
@@ -958,6 +958,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
float
length_scale
;
// < 1, faster in speech speed; > 1, slower in speed
const
char
*
dict_dir
;
const
char
*
lexicon
;
const
char
*
lang
;
}
SherpaOnnxOfflineTtsKokoroModelConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxOfflineTtsModelConfig
{
...
...
sherpa-onnx/c-api/cxx-api.cc
查看文件 @
6982b86
...
...
@@ -366,6 +366,7 @@ OfflineTts OfflineTts::Create(const OfflineTtsConfig &config) {
c
.
model
.
kokoro
.
length_scale
=
config
.
model
.
kokoro
.
length_scale
;
c
.
model
.
kokoro
.
dict_dir
=
config
.
model
.
kokoro
.
dict_dir
.
c_str
();
c
.
model
.
kokoro
.
lexicon
=
config
.
model
.
kokoro
.
lexicon
.
c_str
();
c
.
model
.
kokoro
.
lang
=
config
.
model
.
kokoro
.
lang
.
c_str
();
c
.
model
.
num_threads
=
config
.
model
.
num_threads
;
c
.
model
.
debug
=
config
.
model
.
debug
;
...
...
sherpa-onnx/c-api/cxx-api.h
查看文件 @
6982b86
...
...
@@ -367,6 +367,7 @@ struct OfflineTtsKokoroModelConfig {
std
::
string
data_dir
;
std
::
string
dict_dir
;
std
::
string
lexicon
;
std
::
string
lang
;
float
length_scale
=
1
.
0
;
// < 1, faster in speed; > 1, slower in speed
};
...
...
sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
查看文件 @
6982b86
...
...
@@ -67,7 +67,8 @@ class KokoroMultiLangLexicon::Impl {
InitEspeak
(
data_dir
);
// See ./piper-phonemize-lexicon.cc
}
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIds
(
const
std
::
string
&
_text
)
const
{
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIds
(
const
std
::
string
&
_text
,
const
std
::
string
&
voice
)
const
{
std
::
string
text
=
ToLowerCase
(
_text
);
if
(
debug_
)
{
SHERPA_ONNX_LOGE
(
"After converting to lowercase:
\n
%s"
,
text
.
c_str
());
...
...
@@ -124,7 +125,7 @@ class KokoroMultiLangLexicon::Impl {
SHERPA_ONNX_LOGE
(
"Non-Chinese: %s"
,
ms
.
c_str
());
}
ids_vec
=
Convert
EnglishToTokenIDs
(
ms
,
meta_data_
.
voice
);
ids_vec
=
Convert
NonChineseToTokenIDs
(
ms
,
voice
);
}
for
(
const
auto
&
ids
:
ids_vec
)
{
...
...
@@ -255,8 +256,30 @@ class KokoroMultiLangLexicon::Impl {
return
ans
;
}
std
::
vector
<
std
::
vector
<
int32_t
>>
Convert
EnglishToTokenIDs
(
std
::
vector
<
std
::
vector
<
int32_t
>>
Convert
TextToTokenIDsWithEspeak
(
const
std
::
string
&
text
,
const
std
::
string
&
voice
)
const
{
auto
temp
=
ConvertTextToTokenIdsKokoro
(
phoneme2id_
,
meta_data_
.
max_token_len
,
text
,
voice
);
std
::
vector
<
std
::
vector
<
int32_t
>>
ans
;
ans
.
reserve
(
temp
.
size
());
for
(
const
auto
&
i
:
temp
)
{
ans
.
emplace_back
(
i
.
tokens
.
begin
(),
i
.
tokens
.
end
());
}
return
ans
;
}
std
::
vector
<
std
::
vector
<
int32_t
>>
ConvertNonChineseToTokenIDs
(
const
std
::
string
&
text
,
const
std
::
string
&
voice
)
const
{
if
(
!
voice
.
empty
())
{
return
ConvertTextToTokenIDsWithEspeak
(
text
,
voice
);
}
// If voice is empty, we split the text into words and use the lexicon
// to lookup the pronunciation of each word, fallback to espeak if
// a word is not in the lexicon.
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
text
);
if
(
debug_
)
{
std
::
ostringstream
os
;
...
...
@@ -317,7 +340,7 @@ class KokoroMultiLangLexicon::Impl {
piper
::
eSpeakPhonemeConfig
config
;
config
.
voice
=
voice
;
config
.
voice
=
meta_data_
.
voice
;
std
::
vector
<
std
::
vector
<
piper
::
Phoneme
>>
phonemes
;
...
...
@@ -391,9 +414,28 @@ class KokoroMultiLangLexicon::Impl {
void
InitTokens
(
std
::
istream
&
is
)
{
token2id_
=
ReadTokens
(
is
);
// defined in ./symbol-table.cc
std
::
wstring_convert
<
std
::
codecvt_utf8
<
char32_t
>
,
char32_t
>
conv
;
std
::
u32string
s
;
for
(
const
auto
&
p
:
token2id_
)
{
s
=
conv
.
from_bytes
(
p
.
first
);
if
(
s
.
size
()
!=
1
)
{
SHERPA_ONNX_LOGE
(
"Error for token %s with id %d"
,
p
.
first
.
c_str
(),
p
.
second
);
SHERPA_ONNX_EXIT
(
-
1
);
}
char32_t
c
=
s
[
0
];
phoneme2id_
.
insert
({
c
,
p
.
second
});
}
}
void
InitLexicon
(
const
std
::
string
&
lexicon
)
{
if
(
lexicon
.
empty
())
{
return
;
}
std
::
vector
<
std
::
string
>
files
;
SplitStringToVector
(
lexicon
,
","
,
false
,
&
files
);
for
(
const
auto
&
f
:
files
)
{
...
...
@@ -404,6 +446,10 @@ class KokoroMultiLangLexicon::Impl {
template
<
typename
Manager
>
void
InitLexicon
(
Manager
*
mgr
,
const
std
::
string
&
lexicon
)
{
if
(
lexicon
.
empty
())
{
return
;
}
std
::
vector
<
std
::
string
>
files
;
SplitStringToVector
(
lexicon
,
","
,
false
,
&
files
);
for
(
const
auto
&
f
:
files
)
{
...
...
@@ -445,7 +491,7 @@ class KokoroMultiLangLexicon::Impl {
std
::
vector
<
int32_t
>
ids
=
ConvertTokensToIds
(
token2id_
,
token_list
);
if
(
ids
.
empty
())
{
if
(
ids
.
empty
()
&&
word
!=
"呣"
)
{
SHERPA_ONNX_LOGE
(
"Invalid pronunciation for word '%s' at line %d:%s. Ignore it"
,
word
.
c_str
(),
line_num
,
line
.
c_str
());
...
...
@@ -465,6 +511,8 @@ class KokoroMultiLangLexicon::Impl {
// tokens.txt is saved in token2id_
std
::
unordered_map
<
std
::
string
,
int32_t
>
token2id_
;
std
::
unordered_map
<
char32_t
,
int32_t
>
phoneme2id_
;
std
::
unique_ptr
<
cppjieba
::
Jieba
>
jieba_
;
bool
debug_
=
false
;
};
...
...
@@ -487,8 +535,8 @@ KokoroMultiLangLexicon::KokoroMultiLangLexicon(
meta_data
,
debug
))
{}
std
::
vector
<
TokenIDs
>
KokoroMultiLangLexicon
::
ConvertTextToTokenIds
(
const
std
::
string
&
text
,
const
std
::
string
&
/*unused_voice = ""*/
)
const
{
return
impl_
->
ConvertTextToTokenIds
(
text
);
const
std
::
string
&
text
,
const
std
::
string
&
voice
/*= ""*/
)
const
{
return
impl_
->
ConvertTextToTokenIds
(
text
,
voice
);
}
#if __ANDROID_API__ >= 9
...
...
sherpa-onnx/csrc/offline-speech-denoiser-model-config.h
查看文件 @
6982b86
...
...
@@ -20,9 +20,9 @@ struct OfflineSpeechDenoiserModelConfig {
OfflineSpeechDenoiserModelConfig
()
=
default
;
OfflineSpeechDenoiserModelConfig
(
OfflineSpeechDenoiserGtcrnModelConfig
gtcrn
,
int32_t
num_threads
,
bool
debug
,
const
std
::
string
&
provider
)
OfflineSpeechDenoiserModelConfig
(
const
OfflineSpeechDenoiserGtcrnModelConfig
&
gtcrn
,
int32_t
num_threads
,
bool
debug
,
const
std
::
string
&
provider
)
:
gtcrn
(
gtcrn
),
num_threads
(
num_threads
),
debug
(
debug
),
...
...
sherpa-onnx/csrc/offline-tts-frontend.h
查看文件 @
6982b86
...
...
@@ -6,6 +6,7 @@
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
#include <cstdint>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
...
...
@@ -57,6 +58,12 @@ class OfflineTtsFrontend {
// implementation is in ./piper-phonemize-lexicon.cc
void
InitEspeak
(
const
std
::
string
&
data_dir
);
// implementation in ./piper-phonemize-lexicon.cc
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIdsKokoro
(
const
std
::
unordered_map
<
char32_t
,
int32_t
>
&
token2id
,
int32_t
max_token_len
,
const
std
::
string
&
text
,
const
std
::
string
&
voice
=
""
);
}
// namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_OFFLINE_TTS_FRONTEND_H_
...
...
sherpa-onnx/csrc/offline-tts-kokoro-impl.h
查看文件 @
6982b86
...
...
@@ -220,8 +220,9 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
}
}
std
::
vector
<
TokenIDs
>
token_ids
=
frontend_
->
ConvertTextToTokenIds
(
text
,
meta_data
.
voice
);
std
::
vector
<
TokenIDs
>
token_ids
=
frontend_
->
ConvertTextToTokenIds
(
text
,
config_
.
model
.
kokoro
.
lang
.
empty
()
?
meta_data
.
voice
:
config_
.
model
.
kokoro
.
lang
);
if
(
token_ids
.
empty
()
||
(
token_ids
.
size
()
==
1
&&
token_ids
[
0
].
tokens
.
empty
()))
{
...
...
@@ -335,12 +336,14 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
if
(
meta_data
.
version
>=
2
)
{
// this is a multi-lingual model, we require that you pass lexicon
// and dict_dir
if
(
config_
.
model
.
kokoro
.
lexicon
.
empty
()
||
if
((
config_
.
model
.
kokoro
.
lexicon
.
empty
()
&&
config_
.
model
.
kokoro
.
lang
.
empty
())
||
config_
.
model
.
kokoro
.
dict_dir
.
empty
())
{
SHERPA_ONNX_LOGE
(
"Current model version: '%d'"
,
meta_data
.
version
);
SHERPA_ONNX_LOGE
(
"You are using a multi-lingual Kokoro model (e.g., Kokoro >= "
"v1.0). please pass --kokoro-lexicon and --kokoro-dict-dir"
);
"v1.0). Please pass --kokoro-lexicon and --kokoro-dict-dir or "
"provide --kokoro-lang and --kokoro-dict-dir"
);
SHERPA_ONNX_EXIT
(
-
1
);
}
...
...
@@ -362,7 +365,8 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
if
(
meta_data
.
version
>=
2
)
{
// this is a multi-lingual model, we require that you pass lexicon
// and dict_dir
if
(
config_
.
model
.
kokoro
.
lexicon
.
empty
()
||
if
((
config_
.
model
.
kokoro
.
lexicon
.
empty
()
&&
config_
.
model
.
kokoro
.
lang
.
empty
())
||
config_
.
model
.
kokoro
.
dict_dir
.
empty
())
{
SHERPA_ONNX_LOGE
(
"Current model version: '%d'"
,
meta_data
.
version
);
SHERPA_ONNX_LOGE
(
...
...
sherpa-onnx/csrc/offline-tts-kokoro-model-config.cc
查看文件 @
6982b86
...
...
@@ -18,6 +18,13 @@ void OfflineTtsKokoroModelConfig::Register(ParseOptions *po) {
"Path to voices.bin for Kokoro models"
);
po
->
Register
(
"kokoro-tokens"
,
&
tokens
,
"Path to tokens.txt for Kokoro models"
);
po
->
Register
(
"kokoro-lang"
,
&
lang
,
"Used only by kokoro >= 1.0. Example values: "
"en (English), "
"es (Spanish), fr (French), hi (hindi), it (Italian), "
"pt-br (Brazilian Portuguese)."
"You can leave it empty, in which case you need to provide "
"--kokoro-lexicon."
);
po
->
Register
(
"kokoro-lexicon"
,
&
lexicon
,
"Path to lexicon.txt for Kokoro models. Used only for Kokoro >= v1.0"
...
...
@@ -127,7 +134,8 @@ std::string OfflineTtsKokoroModelConfig::ToString() const {
os
<<
"lexicon=
\"
"
<<
lexicon
<<
"
\"
, "
;
os
<<
"data_dir=
\"
"
<<
data_dir
<<
"
\"
, "
;
os
<<
"dict_dir=
\"
"
<<
dict_dir
<<
"
\"
, "
;
os
<<
"length_scale="
<<
length_scale
<<
")"
;
os
<<
"length_scale="
<<
length_scale
<<
", "
;
os
<<
"lang=
\"
"
<<
lang
<<
"
\"
)"
;
return
os
.
str
();
}
...
...
sherpa-onnx/csrc/offline-tts-kokoro-model-config.h
查看文件 @
6982b86
...
...
@@ -27,6 +27,13 @@ struct OfflineTtsKokoroModelConfig {
// speed = 1 / length_scale
float
length_scale
=
1
.
0
;
// Used only for Kokoro >= 1.0.
//
// If it is not empty, meta_data.voice is ignored.
// Example values: es (Spanish), fr (French), pt (Portuguese)
// See https://hf-mirror.com/hexgrad/Kokoro-82M/blob/main/VOICES.md
std
::
string
lang
;
OfflineTtsKokoroModelConfig
()
=
default
;
OfflineTtsKokoroModelConfig
(
const
std
::
string
&
model
,
...
...
@@ -34,14 +41,16 @@ struct OfflineTtsKokoroModelConfig {
const
std
::
string
&
tokens
,
const
std
::
string
&
lexicon
,
const
std
::
string
&
data_dir
,
const
std
::
string
&
dict_dir
,
float
length_scale
)
const
std
::
string
&
dict_dir
,
float
length_scale
,
const
std
::
string
&
lang
)
:
model
(
model
),
voices
(
voices
),
tokens
(
tokens
),
lexicon
(
lexicon
),
data_dir
(
data_dir
),
dict_dir
(
dict_dir
),
length_scale
(
length_scale
)
{}
length_scale
(
length_scale
),
lang
(
lang
)
{}
void
Register
(
ParseOptions
*
po
);
bool
Validate
()
const
;
...
...
sherpa-onnx/csrc/piper-phonemize-lexicon.cc
查看文件 @
6982b86
...
...
@@ -351,7 +351,8 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
if
(
is_matcha_
)
{
return
ConvertTextToTokenIdsMatcha
(
text
,
voice
);
}
else
if
(
is_kokoro_
)
{
return
ConvertTextToTokenIdsKokoro
(
text
,
voice
);
return
ConvertTextToTokenIdsKokoro
(
token2id_
,
kokoro_meta_data_
.
max_token_len
,
text
,
voice
);
}
else
{
return
ConvertTextToTokenIdsVits
(
text
,
voice
);
}
...
...
@@ -382,8 +383,10 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsMatcha(
return
ans
;
}
std
::
vector
<
TokenIDs
>
PiperPhonemizeLexicon
::
ConvertTextToTokenIdsKokoro
(
const
std
::
string
&
text
,
const
std
::
string
&
voice
/*= ""*/
)
const
{
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIdsKokoro
(
const
std
::
unordered_map
<
char32_t
,
int32_t
>
&
token2id
,
int32_t
max_token_len
,
const
std
::
string
&
text
,
const
std
::
string
&
voice
/*= ""*/
)
{
piper
::
eSpeakPhonemeConfig
config
;
// ./bin/espeak-ng-bin --path ./install/share/espeak-ng-data/ --voices
...
...
@@ -397,8 +400,7 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsKokoro(
std
::
vector
<
TokenIDs
>
ans
;
for
(
const
auto
&
p
:
phonemes
)
{
auto
phoneme_ids
=
PiperPhonemesToIdsKokoro
(
token2id_
,
p
,
kokoro_meta_data_
.
max_token_len
);
auto
phoneme_ids
=
PiperPhonemesToIdsKokoro
(
token2id
,
p
,
max_token_len
);
for
(
auto
&
ids
:
phoneme_ids
)
{
ans
.
emplace_back
(
std
::
move
(
ids
));
...
...
sherpa-onnx/csrc/piper-phonemize-lexicon.h
查看文件 @
6982b86
...
...
@@ -52,9 +52,6 @@ class PiperPhonemizeLexicon : public OfflineTtsFrontend {
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIdsMatcha
(
const
std
::
string
&
text
,
const
std
::
string
&
voice
=
""
)
const
;
std
::
vector
<
TokenIDs
>
ConvertTextToTokenIdsKokoro
(
const
std
::
string
&
text
,
const
std
::
string
&
voice
=
""
)
const
;
private
:
// map unicode codepoint to an integer ID
std
::
unordered_map
<
char32_t
,
int32_t
>
token2id_
;
...
...
sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java
查看文件 @
6982b86
...
...
@@ -6,6 +6,7 @@ public class OfflineTtsKokoroModelConfig {
private
final
String
voices
;
private
final
String
tokens
;
private
final
String
lexicon
;
private
final
String
lang
;
private
final
String
dataDir
;
private
final
String
dictDir
;
private
final
float
lengthScale
;
...
...
@@ -15,6 +16,7 @@ public class OfflineTtsKokoroModelConfig {
this
.
voices
=
builder
.
voices
;
this
.
tokens
=
builder
.
tokens
;
this
.
lexicon
=
builder
.
lexicon
;
this
.
lang
=
builder
.
lang
;
this
.
dataDir
=
builder
.
dataDir
;
this
.
dictDir
=
builder
.
dictDir
;
this
.
lengthScale
=
builder
.
lengthScale
;
...
...
@@ -50,6 +52,7 @@ public class OfflineTtsKokoroModelConfig {
private
String
voices
=
""
;
private
String
tokens
=
""
;
private
String
lexicon
=
""
;
private
String
lang
=
""
;
private
String
dataDir
=
""
;
private
String
dictDir
=
""
;
private
float
lengthScale
=
1.0f
;
...
...
@@ -78,6 +81,11 @@ public class OfflineTtsKokoroModelConfig {
return
this
;
}
public
Builder
setLang
(
String
lang
)
{
this
.
lang
=
lang
;
return
this
;
}
public
Builder
setDataDir
(
String
dataDir
)
{
this
.
dataDir
=
dataDir
;
return
this
;
...
...
sherpa-onnx/jni/offline-tts.cc
查看文件 @
6982b86
...
...
@@ -145,6 +145,12 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
ans
.
model
.
kokoro
.
lexicon
=
p
;
env
->
ReleaseStringUTFChars
(
s
,
p
);
fid
=
env
->
GetFieldID
(
kokoro_cls
,
"lang"
,
"Ljava/lang/String;"
);
s
=
(
jstring
)
env
->
GetObjectField
(
kokoro
,
fid
);
p
=
env
->
GetStringUTFChars
(
s
,
nullptr
);
ans
.
model
.
kokoro
.
lang
=
p
;
env
->
ReleaseStringUTFChars
(
s
,
p
);
fid
=
env
->
GetFieldID
(
kokoro_cls
,
"dataDir"
,
"Ljava/lang/String;"
);
s
=
(
jstring
)
env
->
GetObjectField
(
kokoro
,
fid
);
p
=
env
->
GetStringUTFChars
(
s
,
nullptr
);
...
...
sherpa-onnx/kotlin-api/Tts.kt
查看文件 @
6982b86
...
...
@@ -31,6 +31,7 @@ data class OfflineTtsKokoroModelConfig(
var tokens: String = "",
var dataDir: String = "",
var lexicon: String = "",
var lang: String = "",
var dictDir: String = "",
var lengthScale: Float = 1.0f,
)
...
...
sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @
6982b86
...
...
@@ -84,6 +84,7 @@ type
LengthScale:
Single;
DictDir:
AnsiString;
Lexicon:
AnsiString;
Lang:
AnsiString;
function
ToString:
AnsiString;
class
operator
Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxOfflineTtsKokoroModelConfig);
...
...
@@ -841,6 +842,7 @@ type
LengthScale:
cfloat;
DictDir:
PAnsiChar;
Lexicon:
PAnsiChar;
Lang:
PAnsiChar;
end;
SherpaOnnxOfflineTtsModelConfig
=
record
...
...
@@ -2096,10 +2098,11 @@ begin
'DataDir
:=
%s,
'
+
'LengthScale
:=
%.
2
f,
'
+
'DictDir
:=
%s,
'
+
'Lexicon
:=
%s'
+
'Lexicon
:=
%s,
'
+
'Lang
:=
%s'
+
')',
[
Self.Model
,
Self.Voices
,
Self.Tokens
,
Self.DataDir
,
Self.LengthScale
,
Self.DictDir
,
Self.Lexicon
]
);
Self.DictDir
,
Self.Lexicon
,
Self.Lang
]
);
end;
class
operator
TSherpaOnnxOfflineTtsKokoroModelConfig.Initialize(
{
$IFDEF
FPC
}
var
{
$ELSE
}
out
{
$ENDIF
}
Dest:
TSherpaOnnxOfflineTtsKokoroModelConfig);
...
...
@@ -2180,6 +2183,7 @@ begin
C.Model.Kokoro.LengthScale
:=
Config.Model.Kokoro.LengthScale;
C.Model.Kokoro.DictDir
:=
PAnsiChar(Config.Model.Kokoro.DictDir);
C.Model.Kokoro.Lexicon
:=
PAnsiChar(Config.Model.Kokoro.Lexicon);
C.Model.Kokoro.Lang
:=
PAnsiChar(Config.Model.Kokoro.Lang);
C.Model.NumThreads
:=
Config.Model.NumThreads;
C.Model.Provider
:=
PAnsiChar(Config.Model.Provider);
...
...
sherpa-onnx/python/csrc/offline-tts-kokoro-model-config.cc
查看文件 @
6982b86
...
...
@@ -17,10 +17,12 @@ void PybindOfflineTtsKokoroModelConfig(py::module *m) {
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&
,
const
std
::
string
&
,
float
>
(),
const
std
::
string
&
,
const
std
::
string
&
,
float
,
const
std
::
string
&>
(),
py
::
arg
(
"model"
),
py
::
arg
(
"voices"
),
py
::
arg
(
"tokens"
),
py
::
arg
(
"lexicon"
)
=
""
,
py
::
arg
(
"data_dir"
),
py
::
arg
(
"dict_dir"
)
=
""
,
py
::
arg
(
"length_scale"
)
=
1.0
)
py
::
arg
(
"dict_dir"
)
=
""
,
py
::
arg
(
"length_scale"
)
=
1.0
,
py
::
arg
(
"lang"
)
=
""
)
.
def_readwrite
(
"model"
,
&
PyClass
::
model
)
.
def_readwrite
(
"voices"
,
&
PyClass
::
voices
)
.
def_readwrite
(
"tokens"
,
&
PyClass
::
tokens
)
...
...
@@ -28,6 +30,7 @@ void PybindOfflineTtsKokoroModelConfig(py::module *m) {
.
def_readwrite
(
"data_dir"
,
&
PyClass
::
data_dir
)
.
def_readwrite
(
"dict_dir"
,
&
PyClass
::
dict_dir
)
.
def_readwrite
(
"length_scale"
,
&
PyClass
::
length_scale
)
.
def_readwrite
(
"lang"
,
&
PyClass
::
lang
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
)
.
def
(
"validate"
,
&
PyClass
::
Validate
);
}
...
...
swift-api-examples/SherpaOnnx.swift
查看文件 @
6982b86
...
...
@@ -806,7 +806,8 @@ func sherpaOnnxOfflineTtsKokoroModelConfig(
dataDir
:
String
=
""
,
lengthScale
:
Float
=
1.0
,
dictDir
:
String
=
""
,
lexicon
:
String
=
""
lexicon
:
String
=
""
,
lang
:
String
=
""
)
->
SherpaOnnxOfflineTtsKokoroModelConfig
{
return
SherpaOnnxOfflineTtsKokoroModelConfig
(
model
:
toCPointer
(
model
),
...
...
@@ -815,7 +816,8 @@ func sherpaOnnxOfflineTtsKokoroModelConfig(
data_dir
:
toCPointer
(
dataDir
),
length_scale
:
lengthScale
,
dict_dir
:
toCPointer
(
dictDir
),
lexicon
:
toCPointer
(
lexicon
)
lexicon
:
toCPointer
(
lexicon
),
lang
:
toCPointer
(
lang
)
)
}
...
...
wasm/tts/sherpa-onnx-tts.js
查看文件 @
6982b86
...
...
@@ -143,13 +143,14 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
const
dataDirLen
=
Module
.
lengthBytesUTF8
(
config
.
dataDir
||
''
)
+
1
;
const
dictDirLen
=
Module
.
lengthBytesUTF8
(
config
.
dictDir
||
''
)
+
1
;
const
lexiconLen
=
Module
.
lengthBytesUTF8
(
config
.
lexicon
||
''
)
+
1
;
const
langLen
=
Module
.
lengthBytesUTF8
(
config
.
lang
||
''
)
+
1
;
const
n
=
modelLen
+
voicesLen
+
tokensLen
+
dataDirLen
+
dictDirLen
+
lexiconLen
;
const
n
=
modelLen
+
voicesLen
+
tokensLen
+
dataDirLen
+
dictDirLen
+
lexiconLen
+
langLen
;
const
buffer
=
Module
.
_malloc
(
n
);
const
len
=
7
*
4
;
const
len
=
8
*
4
;
const
ptr
=
Module
.
_malloc
(
len
);
let
offset
=
0
;
...
...
@@ -171,6 +172,9 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
Module
.
stringToUTF8
(
config
.
lexicon
||
''
,
buffer
+
offset
,
lexiconLen
);
offset
+=
lexiconLen
;
Module
.
stringToUTF8
(
config
.
lang
||
''
,
buffer
+
offset
,
langLen
);
offset
+=
langLen
;
offset
=
0
;
Module
.
setValue
(
ptr
,
buffer
+
offset
,
'i8*'
);
offset
+=
modelLen
;
...
...
@@ -192,6 +196,9 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
Module
.
setValue
(
ptr
+
24
,
buffer
+
offset
,
'i8*'
);
offset
+=
lexiconLen
;
Module
.
setValue
(
ptr
+
28
,
buffer
+
offset
,
'i8*'
);
offset
+=
langLen
;
return
{
buffer
:
buffer
,
ptr
:
ptr
,
len
:
len
,
}
...
...
@@ -233,6 +240,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
dataDir
:
''
,
dictDir
:
''
,
lexicon
:
''
,
lang
:
''
,
};
}
...
...
wasm/tts/sherpa-onnx-wasm-main-tts.cc
查看文件 @
6982b86
...
...
@@ -15,7 +15,7 @@ extern "C" {
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsVitsModelConfig
)
==
8
*
4
,
""
);
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsMatchaModelConfig
)
==
8
*
4
,
""
);
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsKokoroModelConfig
)
==
7
*
4
,
""
);
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsKokoroModelConfig
)
==
8
*
4
,
""
);
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsModelConfig
)
==
sizeof
(
SherpaOnnxOfflineTtsVitsModelConfig
)
+
sizeof
(
SherpaOnnxOfflineTtsMatchaModelConfig
)
+
...
...
请
注册
或
登录
后发表评论