Fangjun Kuang
Committed by GitHub

Support Matcha-TTS models using espeak-ng (#1672)

@@ -19,6 +19,28 @@ which $EXE @@ -19,6 +19,28 @@ which $EXE
19 mkdir ./tts 19 mkdir ./tts
20 20
21 log "------------------------------------------------------------" 21 log "------------------------------------------------------------"
  22 +log "matcha-icefall-en_US-ljspeech"
  23 +log "------------------------------------------------------------"
  24 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  25 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
  26 +rm matcha-icefall-en_US-ljspeech.tar.bz2
  27 +
  28 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
  29 +
  30 +$EXE \
  31 + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  32 + --matcha-vocoder=./hifigan_v2.onnx \
  33 + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  34 + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  35 + --num-threads=2 \
  36 + --output-filename=./tts/matcha-ljspeech-1.wav \
  37 + --debug=1 \
  38 + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
  39 +
  40 +rm hifigan_v2.onnx
  41 +rm -rf matcha-icefall-en_US-ljspeech
  42 +
  43 +log "------------------------------------------------------------"
22 log "matcha-icefall-zh-baker" 44 log "matcha-icefall-zh-baker"
23 log "------------------------------------------------------------" 45 log "------------------------------------------------------------"
24 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 46 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
@@ -267,7 +267,27 @@ log "Offline TTS test" @@ -267,7 +267,27 @@ log "Offline TTS test"
267 # test waves are saved in ./tts 267 # test waves are saved in ./tts
268 mkdir ./tts 268 mkdir ./tts
269 269
270 -log "vits-ljs test" 270 +log "matcha-ljspeech-en test"
  271 +
  272 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  273 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
  274 +rm matcha-icefall-en_US-ljspeech.tar.bz2
  275 +
  276 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
  277 +
  278 +python3 ./python-api-examples/offline-tts.py \
  279 + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  280 + --matcha-vocoder=./hifigan_v2.onnx \
  281 + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  282 + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  283 + --output-filename=./tts/test-matcha-ljspeech-en.wav \
  284 + --num-threads=2 \
  285 + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
  286 +
  287 +rm hifigan_v2.onnx
  288 +rm -rf matcha-icefall-en_US-ljspeech
  289 +
  290 +log "matcha-baker-zh test"
271 291
272 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 292 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
273 tar xvf matcha-icefall-zh-baker.tar.bz2 293 tar xvf matcha-icefall-zh-baker.tar.bz2
@@ -282,12 +302,13 @@ python3 ./python-api-examples/offline-tts.py \ @@ -282,12 +302,13 @@ python3 ./python-api-examples/offline-tts.py \
282 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ 302 --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
283 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ 303 --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
284 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ 304 --matcha-dict-dir=./matcha-icefall-zh-baker/dict \
285 - --output-filename=./tts/test-matcha.wav \ 305 + --output-filename=./tts/test-matcha-baker-zh.wav \
286 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" 306 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
287 307
288 rm -rf matcha-icefall-zh-baker 308 rm -rf matcha-icefall-zh-baker
289 rm hifigan_v2.onnx 309 rm hifigan_v2.onnx
290 310
  311 +log "vits-ljs test"
291 312
292 curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx 313 curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/vits-ljs.onnx
293 curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt 314 curl -LS -O https://huggingface.co/csukuangfj/vits-ljs/resolve/main/lexicon.txt
@@ -11,7 +11,7 @@ while the model is still generating. @@ -11,7 +11,7 @@ while the model is still generating.
11 11
12 Usage: 12 Usage:
13 13
14 -Example (1/4) 14 +Example (1/5)
15 15
16 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 16 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
17 tar xf vits-piper-en_US-amy-low.tar.bz2 17 tar xf vits-piper-en_US-amy-low.tar.bz2
@@ -23,7 +23,7 @@ python3 ./python-api-examples/offline-tts-play.py \ @@ -23,7 +23,7 @@ python3 ./python-api-examples/offline-tts-play.py \
23 --output-filename=./generated.wav \ 23 --output-filename=./generated.wav \
24 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." 24 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
25 25
26 -Example (2/4) 26 +Example (2/5)
27 27
28 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2 28 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
29 tar xvf vits-zh-aishell3.tar.bz2 29 tar xvf vits-zh-aishell3.tar.bz2
@@ -37,7 +37,7 @@ python3 ./python-api-examples/offline-tts-play.py \ @@ -37,7 +37,7 @@ python3 ./python-api-examples/offline-tts-play.py \
37 --output-filename=./liubei-21.wav \ 37 --output-filename=./liubei-21.wav \
38 "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" 38 "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334"
39 39
40 -Example (3/4) 40 +Example (3/5)
41 41
42 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 42 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
43 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 43 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
@@ -53,7 +53,7 @@ python3 ./python-api-examples/offline-tts-play.py \ @@ -53,7 +53,7 @@ python3 ./python-api-examples/offline-tts-play.py \
53 --output-filename=./test-2.wav \ 53 --output-filename=./test-2.wav \
54 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" 54 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。"
55 55
56 -Example (4/4) 56 +Example (4/5)
57 57
58 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 58 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
59 tar xvf matcha-icefall-zh-baker.tar.bz2 59 tar xvf matcha-icefall-zh-baker.tar.bz2
@@ -71,6 +71,23 @@ python3 ./python-api-examples/offline-tts-play.py \ @@ -71,6 +71,23 @@ python3 ./python-api-examples/offline-tts-play.py \
71 --output-filename=./test-matcha.wav \ 71 --output-filename=./test-matcha.wav \
72 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" 72 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
73 73
  74 +Example (5/5)
  75 +
  76 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  77 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
  78 +rm matcha-icefall-en_US-ljspeech.tar.bz2
  79 +
  80 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
  81 +
  82 +python3 ./python-api-examples/offline-tts-play.py \
  83 + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  84 + --matcha-vocoder=./hifigan_v2.onnx \
  85 + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  86 + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  87 + --output-filename=./test-matcha-ljspeech-en.wav \
  88 + --num-threads=2 \
  89 + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
  90 +
74 91
75 You can find more models at 92 You can find more models at
76 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models 93 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
@@ -12,7 +12,7 @@ generated audio. @@ -12,7 +12,7 @@ generated audio.
12 12
13 Usage: 13 Usage:
14 14
15 -Example (1/4) 15 +Example (1/5)
16 16
17 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 17 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
18 tar xf vits-piper-en_US-amy-low.tar.bz2 18 tar xf vits-piper-en_US-amy-low.tar.bz2
@@ -24,7 +24,7 @@ python3 ./python-api-examples/offline-tts.py \ @@ -24,7 +24,7 @@ python3 ./python-api-examples/offline-tts.py \
24 --output-filename=./generated.wav \ 24 --output-filename=./generated.wav \
25 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." 25 "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
26 26
27 -Example (2/4) 27 +Example (2/5)
28 28
29 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2 29 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
30 tar xvf vits-icefall-zh-aishell3.tar.bz2 30 tar xvf vits-icefall-zh-aishell3.tar.bz2
@@ -38,7 +38,7 @@ python3 ./python-api-examples/offline-tts.py \ @@ -38,7 +38,7 @@ python3 ./python-api-examples/offline-tts.py \
38 --output-filename=./liubei-21.wav \ 38 --output-filename=./liubei-21.wav \
39 "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334" 39 "勿以恶小而为之,勿以善小而不为。惟贤惟德,能服于人。122334"
40 40
41 -Example (3/4) 41 +Example (3/5)
42 42
43 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2 43 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/sherpa-onnx-vits-zh-ll.tar.bz2
44 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2 44 tar xvf sherpa-onnx-vits-zh-ll.tar.bz2
@@ -54,7 +54,7 @@ python3 ./python-api-examples/offline-tts.py \ @@ -54,7 +54,7 @@ python3 ./python-api-examples/offline-tts.py \
54 --output-filename=./test-2.wav \ 54 --output-filename=./test-2.wav \
55 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。" 55 "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。2024年5月11号,拨打110或者18920240511。123456块钱。"
56 56
57 -Example (4/4) 57 +Example (4/5)
58 58
59 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 59 curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
60 tar xvf matcha-icefall-zh-baker.tar.bz2 60 tar xvf matcha-icefall-zh-baker.tar.bz2
@@ -72,6 +72,23 @@ python3 ./python-api-examples/offline-tts.py \ @@ -72,6 +72,23 @@ python3 ./python-api-examples/offline-tts.py \
72 --output-filename=./test-matcha.wav \ 72 --output-filename=./test-matcha.wav \
73 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" 73 "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
74 74
  75 +Example (5/5)
  76 +
  77 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
  78 +tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
  79 +rm matcha-icefall-en_US-ljspeech.tar.bz2
  80 +
  81 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
  82 +
  83 +python3 ./python-api-examples/offline-tts.py \
  84 + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
  85 + --matcha-vocoder=./hifigan_v2.onnx \
  86 + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
  87 + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
  88 + --output-filename=./test-matcha-ljspeech-en.wav \
  89 + --num-threads=2 \
  90 + "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar."
  91 +
75 You can find more models at 92 You can find more models at
76 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models 93 https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
77 94
@@ -49,19 +49,21 @@ @@ -49,19 +49,21 @@
49 } while (0) 49 } while (0)
50 #endif 50 #endif
51 51
  52 +#define SHERPA_ONNX_EXIT(code) exit(code)
  53 +
52 // Read an integer 54 // Read an integer
53 #define SHERPA_ONNX_READ_META_DATA(dst, src_key) \ 55 #define SHERPA_ONNX_READ_META_DATA(dst, src_key) \
54 do { \ 56 do { \
55 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 57 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
56 if (value.empty()) { \ 58 if (value.empty()) { \
57 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \ 59 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \
58 - exit(-1); \ 60 + SHERPA_ONNX_EXIT(-1); \
59 } \ 61 } \
60 \ 62 \
61 dst = atoi(value.c_str()); \ 63 dst = atoi(value.c_str()); \
62 if (dst < 0) { \ 64 if (dst < 0) { \
63 SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key); \ 65 SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key); \
64 - exit(-1); \ 66 + SHERPA_ONNX_EXIT(-1); \
65 } \ 67 } \
66 } while (0) 68 } while (0)
67 69
@@ -74,7 +76,7 @@ @@ -74,7 +76,7 @@
74 dst = atoi(value.c_str()); \ 76 dst = atoi(value.c_str()); \
75 if (dst < 0) { \ 77 if (dst < 0) { \
76 SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key); \ 78 SHERPA_ONNX_LOGE("Invalid value %d for '%s'", dst, src_key); \
77 - exit(-1); \ 79 + SHERPA_ONNX_EXIT(-1); \
78 } \ 80 } \
79 } \ 81 } \
80 } while (0) 82 } while (0)
@@ -85,13 +87,13 @@ @@ -85,13 +87,13 @@
85 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 87 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
86 if (value.empty()) { \ 88 if (value.empty()) { \
87 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \ 89 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \
88 - exit(-1); \ 90 + SHERPA_ONNX_EXIT(-1); \
89 } \ 91 } \
90 \ 92 \
91 bool ret = SplitStringToIntegers(value.c_str(), ",", true, &dst); \ 93 bool ret = SplitStringToIntegers(value.c_str(), ",", true, &dst); \
92 if (!ret) { \ 94 if (!ret) { \
93 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \ 95 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
94 - exit(-1); \ 96 + SHERPA_ONNX_EXIT(-1); \
95 } \ 97 } \
96 } while (0) 98 } while (0)
97 99
@@ -101,13 +103,13 @@ @@ -101,13 +103,13 @@
101 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 103 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
102 if (value.empty()) { \ 104 if (value.empty()) { \
103 SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key); \ 105 SHERPA_ONNX_LOGE("%s does not exist in the metadata", src_key); \
104 - exit(-1); \ 106 + SHERPA_ONNX_EXIT(-1); \
105 } \ 107 } \
106 \ 108 \
107 bool ret = SplitStringToFloats(value.c_str(), ",", true, &dst); \ 109 bool ret = SplitStringToFloats(value.c_str(), ",", true, &dst); \
108 if (!ret) { \ 110 if (!ret) { \
109 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \ 111 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'", value.c_str(), src_key); \
110 - exit(-1); \ 112 + SHERPA_ONNX_EXIT(-1); \
111 } \ 113 } \
112 } while (0) 114 } while (0)
113 115
@@ -117,14 +119,14 @@ @@ -117,14 +119,14 @@
117 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 119 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
118 if (value.empty()) { \ 120 if (value.empty()) { \
119 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \ 121 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \
120 - exit(-1); \ 122 + SHERPA_ONNX_EXIT(-1); \
121 } \ 123 } \
122 SplitStringToVector(value.c_str(), ",", false, &dst); \ 124 SplitStringToVector(value.c_str(), ",", false, &dst); \
123 \ 125 \
124 if (dst.empty()) { \ 126 if (dst.empty()) { \
125 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!", \ 127 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!", \
126 value.c_str(), src_key); \ 128 value.c_str(), src_key); \
127 - exit(-1); \ 129 + SHERPA_ONNX_EXIT(-1); \
128 } \ 130 } \
129 } while (0) 131 } while (0)
130 132
@@ -134,14 +136,14 @@ @@ -134,14 +136,14 @@
134 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 136 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
135 if (value.empty()) { \ 137 if (value.empty()) { \
136 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \ 138 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \
137 - exit(-1); \ 139 + SHERPA_ONNX_EXIT(-1); \
138 } \ 140 } \
139 SplitStringToVector(value.c_str(), sep, false, &dst); \ 141 SplitStringToVector(value.c_str(), sep, false, &dst); \
140 \ 142 \
141 if (dst.empty()) { \ 143 if (dst.empty()) { \
142 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!", \ 144 SHERPA_ONNX_LOGE("Invalid value '%s' for '%s'. Empty vector!", \
143 value.c_str(), src_key); \ 145 value.c_str(), src_key); \
144 - exit(-1); \ 146 + SHERPA_ONNX_EXIT(-1); \
145 } \ 147 } \
146 } while (0) 148 } while (0)
147 149
@@ -151,13 +153,13 @@ @@ -151,13 +153,13 @@
151 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \ 153 auto value = LookupCustomModelMetaData(meta_data, src_key, allocator); \
152 if (value.empty()) { \ 154 if (value.empty()) { \
153 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \ 155 SHERPA_ONNX_LOGE("'%s' does not exist in the metadata", src_key); \
154 - exit(-1); \ 156 + SHERPA_ONNX_EXIT(-1); \
155 } \ 157 } \
156 \ 158 \
157 dst = std::move(value); \ 159 dst = std::move(value); \
158 if (dst.empty()) { \ 160 if (dst.empty()) { \
159 SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key); \ 161 SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key); \
160 - exit(-1); \ 162 + SHERPA_ONNX_EXIT(-1); \
161 } \ 163 } \
162 } while (0) 164 } while (0)
163 165
@@ -178,11 +180,9 @@ @@ -178,11 +180,9 @@
178 dst = std::move(value); \ 180 dst = std::move(value); \
179 if (dst.empty()) { \ 181 if (dst.empty()) { \
180 SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key); \ 182 SHERPA_ONNX_LOGE("Invalid value for '%s'\n", src_key); \
181 - exit(-1); \ 183 + SHERPA_ONNX_EXIT(-1); \
182 } \ 184 } \
183 } \ 185 } \
184 } while (0) 186 } while (0)
185 187
186 -#define SHERPA_ONNX_EXIT(code) exit(code)  
187 -  
188 #endif // SHERPA_ONNX_CSRC_MACROS_H_ 188 #endif // SHERPA_ONNX_CSRC_MACROS_H_
@@ -321,12 +321,45 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { @@ -321,12 +321,45 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
321 321
322 private: 322 private:
323 template <typename Manager> 323 template <typename Manager>
324 - void InitFrontend(Manager *mgr) {} 324 + void InitFrontend(Manager *mgr) {
  325 + // for piper phonemizer
  326 + // we require that you copy espeak_ng_data
  327 + // from assets to disk
  328 + //
  329 + // for jieba
  330 + // we require that you copy tokens.txt, lexicon.txt and dict
  331 + // from assets to disk
  332 + const auto &meta_data = model_->GetMetaData();
  333 +
  334 + if (meta_data.jieba && !meta_data.has_espeak) {
  335 + frontend_ = std::make_unique<JiebaLexicon>(
  336 + config_.model.matcha.lexicon, config_.model.matcha.tokens,
  337 + config_.model.matcha.dict_dir, config_.model.debug);
  338 + } else if (meta_data.has_espeak && !meta_data.jieba) {
  339 + frontend_ = std::make_unique<PiperPhonemizeLexicon>(
  340 + mgr, config_.model.matcha.tokens, config_.model.matcha.data_dir,
  341 + meta_data);
  342 + } else {
  343 + SHERPA_ONNX_LOGE("jieba + espeaker-ng is not supported yet");
  344 + SHERPA_ONNX_EXIT(-1);
  345 + }
  346 + }
325 347
326 void InitFrontend() { 348 void InitFrontend() {
  349 + const auto &meta_data = model_->GetMetaData();
  350 +
  351 + if (meta_data.jieba && !meta_data.has_espeak) {
327 frontend_ = std::make_unique<JiebaLexicon>( 352 frontend_ = std::make_unique<JiebaLexicon>(
328 config_.model.matcha.lexicon, config_.model.matcha.tokens, 353 config_.model.matcha.lexicon, config_.model.matcha.tokens,
329 config_.model.matcha.dict_dir, config_.model.debug); 354 config_.model.matcha.dict_dir, config_.model.debug);
  355 + } else if (meta_data.has_espeak && !meta_data.jieba) {
  356 + frontend_ = std::make_unique<PiperPhonemizeLexicon>(
  357 + config_.model.matcha.tokens, config_.model.matcha.data_dir,
  358 + meta_data);
  359 + } else {
  360 + SHERPA_ONNX_LOGE("jieba + espeaker-ng is not supported yet");
  361 + SHERPA_ONNX_EXIT(-1);
  362 + }
330 } 363 }
331 364
332 GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens, 365 GeneratedAudio Process(const std::vector<std::vector<int64_t>> &tokens,
@@ -18,7 +18,7 @@ struct OfflineTtsMatchaModelMetaData { @@ -18,7 +18,7 @@ struct OfflineTtsMatchaModelMetaData {
18 int32_t num_speakers = 0; 18 int32_t num_speakers = 0;
19 int32_t version = 1; 19 int32_t version = 1;
20 int32_t jieba = 0; 20 int32_t jieba = 0;
21 - int32_t espeak = 0; 21 + int32_t has_espeak = 0;
22 int32_t use_eos_bos = 0; 22 int32_t use_eos_bos = 0;
23 int32_t pad_id = 0; 23 int32_t pad_id = 0;
24 }; 24 };
@@ -142,7 +142,7 @@ class OfflineTtsMatchaModel::Impl { @@ -142,7 +142,7 @@ class OfflineTtsMatchaModel::Impl {
142 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1); 142 SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
143 SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers"); 143 SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
144 SHERPA_ONNX_READ_META_DATA(meta_data_.jieba, "jieba"); 144 SHERPA_ONNX_READ_META_DATA(meta_data_.jieba, "jieba");
145 - SHERPA_ONNX_READ_META_DATA(meta_data_.espeak, "has_espeak"); 145 + SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
146 SHERPA_ONNX_READ_META_DATA(meta_data_.use_eos_bos, "use_eos_bos"); 146 SHERPA_ONNX_READ_META_DATA(meta_data_.use_eos_bos, "use_eos_bos");
147 SHERPA_ONNX_READ_META_DATA(meta_data_.pad_id, "pad_id"); 147 SHERPA_ONNX_READ_META_DATA(meta_data_.pad_id, "pad_id");
148 } 148 }
@@ -32,6 +32,18 @@ @@ -32,6 +32,18 @@
32 32
33 namespace sherpa_onnx { 33 namespace sherpa_onnx {
34 34
  35 +static void CallPhonemizeEspeak(
  36 + const std::string &text,
  37 + piper::eSpeakPhonemeConfig &config, // NOLINT
  38 + std::vector<std::vector<piper::Phoneme>> *phonemes) {
  39 + static std::mutex espeak_mutex;
  40 +
  41 + std::lock_guard<std::mutex> lock(espeak_mutex);
  42 +
  43 + // keep multi threads from calling into piper::phonemize_eSpeak
  44 + piper::phonemize_eSpeak(text, config, *phonemes);
  45 +}
  46 +
35 static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) { 47 static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
36 std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv; 48 std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
37 std::unordered_map<char32_t, int32_t> token2id; 49 std::unordered_map<char32_t, int32_t> token2id;
@@ -87,7 +99,7 @@ static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) { @@ -87,7 +99,7 @@ static std::unordered_map<char32_t, int32_t> ReadTokens(std::istream &is) {
87 99
88 // see the function "phonemes_to_ids" from 100 // see the function "phonemes_to_ids" from
89 // https://github.com/rhasspy/piper/blob/master/notebooks/piper_inference_(ONNX).ipynb 101 // https://github.com/rhasspy/piper/blob/master/notebooks/piper_inference_(ONNX).ipynb
90 -static std::vector<int64_t> PiperPhonemesToIds( 102 +static std::vector<int64_t> PiperPhonemesToIdsVits(
91 const std::unordered_map<char32_t, int32_t> &token2id, 103 const std::unordered_map<char32_t, int32_t> &token2id,
92 const std::vector<piper::Phoneme> &phonemes) { 104 const std::vector<piper::Phoneme> &phonemes) {
93 // see 105 // see
@@ -114,17 +126,46 @@ static std::vector<int64_t> PiperPhonemesToIds( @@ -114,17 +126,46 @@ static std::vector<int64_t> PiperPhonemesToIds(
114 return ans; 126 return ans;
115 } 127 }
116 128
  129 +static std::vector<int64_t> PiperPhonemesToIdsMatcha(
  130 + const std::unordered_map<char32_t, int32_t> &token2id,
  131 + const std::vector<piper::Phoneme> &phonemes, bool use_eos_bos) {
  132 + std::vector<int64_t> ans;
  133 + ans.reserve(phonemes.size());
  134 +
  135 + int32_t bos = token2id.at(U'^');
  136 + int32_t eos = token2id.at(U'$');
  137 +
  138 + if (use_eos_bos) {
  139 + ans.push_back(bos);
  140 + }
  141 +
  142 + for (auto p : phonemes) {
  143 + if (token2id.count(p)) {
  144 + ans.push_back(token2id.at(p));
  145 + } else {
  146 + SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
  147 + static_cast<uint32_t>(p));
  148 + }
  149 + }
  150 +
  151 + if (use_eos_bos) {
  152 + ans.push_back(eos);
  153 + }
  154 +
  155 + return ans;
  156 +}
  157 +
117 static std::vector<int64_t> CoquiPhonemesToIds( 158 static std::vector<int64_t> CoquiPhonemesToIds(
118 const std::unordered_map<char32_t, int32_t> &token2id, 159 const std::unordered_map<char32_t, int32_t> &token2id,
119 const std::vector<piper::Phoneme> &phonemes, 160 const std::vector<piper::Phoneme> &phonemes,
120 - const OfflineTtsVitsModelMetaData &meta_data) { 161 + const OfflineTtsVitsModelMetaData &vits_meta_data) {
121 // see 162 // see
122 // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87 163 // https://github.com/coqui-ai/TTS/blob/dev/TTS/tts/utils/text/tokenizer.py#L87
123 - int32_t use_eos_bos = meta_data.use_eos_bos;  
124 - int32_t bos_id = meta_data.bos_id;  
125 - int32_t eos_id = meta_data.eos_id;  
126 - int32_t blank_id = meta_data.blank_id;  
127 - int32_t add_blank = meta_data.add_blank; 164 + int32_t use_eos_bos = vits_meta_data.use_eos_bos;
  165 + int32_t bos_id = vits_meta_data.bos_id;
  166 + int32_t eos_id = vits_meta_data.eos_id;
  167 + int32_t blank_id = vits_meta_data.blank_id;
  168 + int32_t add_blank = vits_meta_data.add_blank;
128 int32_t comma_id = token2id.at(','); 169 int32_t comma_id = token2id.at(',');
129 170
130 std::vector<int64_t> ans; 171 std::vector<int64_t> ans;
@@ -189,8 +230,37 @@ static void InitEspeak(const std::string &data_dir) { @@ -189,8 +230,37 @@ static void InitEspeak(const std::string &data_dir) {
189 230
190 PiperPhonemizeLexicon::PiperPhonemizeLexicon( 231 PiperPhonemizeLexicon::PiperPhonemizeLexicon(
191 const std::string &tokens, const std::string &data_dir, 232 const std::string &tokens, const std::string &data_dir,
192 - const OfflineTtsVitsModelMetaData &meta_data)  
193 - : meta_data_(meta_data) { 233 + const OfflineTtsVitsModelMetaData &vits_meta_data)
  234 + : vits_meta_data_(vits_meta_data) {
  235 + {
  236 + std::ifstream is(tokens);
  237 + token2id_ = ReadTokens(is);
  238 + }
  239 +
  240 + InitEspeak(data_dir);
  241 +}
  242 +
  243 +template <typename Manager>
  244 +PiperPhonemizeLexicon::PiperPhonemizeLexicon(
  245 + Manager *mgr, const std::string &tokens, const std::string &data_dir,
  246 + const OfflineTtsVitsModelMetaData &vits_meta_data)
  247 + : vits_meta_data_(vits_meta_data) {
  248 + {
  249 + auto buf = ReadFile(mgr, tokens);
  250 + std::istrstream is(buf.data(), buf.size());
  251 + token2id_ = ReadTokens(is);
  252 + }
  253 +
  254 + // We should copy the directory of espeak-ng-data from the asset to
  255 + // some internal or external storage and then pass the directory to
  256 + // data_dir.
  257 + InitEspeak(data_dir);
  258 +}
  259 +
  260 +PiperPhonemizeLexicon::PiperPhonemizeLexicon(
  261 + const std::string &tokens, const std::string &data_dir,
  262 + const OfflineTtsMatchaModelMetaData &matcha_meta_data)
  263 + : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
194 { 264 {
195 std::ifstream is(tokens); 265 std::ifstream is(tokens);
196 token2id_ = ReadTokens(is); 266 token2id_ = ReadTokens(is);
@@ -202,8 +272,8 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon( @@ -202,8 +272,8 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon(
202 template <typename Manager> 272 template <typename Manager>
203 PiperPhonemizeLexicon::PiperPhonemizeLexicon( 273 PiperPhonemizeLexicon::PiperPhonemizeLexicon(
204 Manager *mgr, const std::string &tokens, const std::string &data_dir, 274 Manager *mgr, const std::string &tokens, const std::string &data_dir,
205 - const OfflineTtsVitsModelMetaData &meta_data)  
206 - : meta_data_(meta_data) { 275 + const OfflineTtsMatchaModelMetaData &matcha_meta_data)
  276 + : matcha_meta_data_(matcha_meta_data), is_matcha_(true) {
207 { 277 {
208 auto buf = ReadFile(mgr, tokens); 278 auto buf = ReadFile(mgr, tokens);
209 std::istrstream is(buf.data(), buf.size()); 279 std::istrstream is(buf.data(), buf.size());
@@ -218,6 +288,15 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon( @@ -218,6 +288,15 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon(
218 288
219 std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds( 289 std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
220 const std::string &text, const std::string &voice /*= ""*/) const { 290 const std::string &text, const std::string &voice /*= ""*/) const {
  291 + if (is_matcha_) {
  292 + return ConvertTextToTokenIdsMatcha(text, voice);
  293 + } else {
  294 + return ConvertTextToTokenIdsVits(text, voice);
  295 + }
  296 +}
  297 +
  298 +std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsMatcha(
  299 + const std::string &text, const std::string &voice /*= ""*/) const {
221 piper::eSpeakPhonemeConfig config; 300 piper::eSpeakPhonemeConfig config;
222 301
223 // ./bin/espeak-ng-bin --path ./install/share/espeak-ng-data/ --voices 302 // ./bin/espeak-ng-bin --path ./install/share/espeak-ng-data/ --voices
@@ -226,26 +305,45 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds( @@ -226,26 +305,45 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
226 305
227 std::vector<std::vector<piper::Phoneme>> phonemes; 306 std::vector<std::vector<piper::Phoneme>> phonemes;
228 307
229 - static std::mutex espeak_mutex;  
230 - {  
231 - std::lock_guard<std::mutex> lock(espeak_mutex); 308 + CallPhonemizeEspeak(text, config, &phonemes);
232 309
233 - // keep multi threads from calling into piper::phonemize_eSpeak  
234 - piper::phonemize_eSpeak(text, config, phonemes); 310 + std::vector<TokenIDs> ans;
  311 +
  312 + std::vector<int64_t> phoneme_ids;
  313 +
  314 + for (const auto &p : phonemes) {
  315 + phoneme_ids =
  316 + PiperPhonemesToIdsMatcha(token2id_, p, matcha_meta_data_.use_eos_bos);
  317 + ans.emplace_back(std::move(phoneme_ids));
235 } 318 }
236 319
  320 + return ans;
  321 +}
  322 +
  323 +std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIdsVits(
  324 + const std::string &text, const std::string &voice /*= ""*/) const {
  325 + piper::eSpeakPhonemeConfig config;
  326 +
  327 + // ./bin/espeak-ng-bin --path ./install/share/espeak-ng-data/ --voices
  328 + // to list available voices
  329 + config.voice = voice; // e.g., voice is en-us
  330 +
  331 + std::vector<std::vector<piper::Phoneme>> phonemes;
  332 +
  333 + CallPhonemizeEspeak(text, config, &phonemes);
  334 +
237 std::vector<TokenIDs> ans; 335 std::vector<TokenIDs> ans;
238 336
239 std::vector<int64_t> phoneme_ids; 337 std::vector<int64_t> phoneme_ids;
240 338
241 - if (meta_data_.is_piper || meta_data_.is_icefall) { 339 + if (vits_meta_data_.is_piper || vits_meta_data_.is_icefall) {
242 for (const auto &p : phonemes) { 340 for (const auto &p : phonemes) {
243 - phoneme_ids = PiperPhonemesToIds(token2id_, p); 341 + phoneme_ids = PiperPhonemesToIdsVits(token2id_, p);
244 ans.emplace_back(std::move(phoneme_ids)); 342 ans.emplace_back(std::move(phoneme_ids));
245 } 343 }
246 - } else if (meta_data_.is_coqui) { 344 + } else if (vits_meta_data_.is_coqui) {
247 for (const auto &p : phonemes) { 345 for (const auto &p : phonemes) {
248 - phoneme_ids = CoquiPhonemesToIds(token2id_, p, meta_data_); 346 + phoneme_ids = CoquiPhonemesToIds(token2id_, p, vits_meta_data_);
249 ans.emplace_back(std::move(phoneme_ids)); 347 ans.emplace_back(std::move(phoneme_ids));
250 } 348 }
251 349
@@ -260,13 +358,18 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds( @@ -260,13 +358,18 @@ std::vector<TokenIDs> PiperPhonemizeLexicon::ConvertTextToTokenIds(
260 #if __ANDROID_API__ >= 9 358 #if __ANDROID_API__ >= 9
261 template PiperPhonemizeLexicon::PiperPhonemizeLexicon( 359 template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
262 AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, 360 AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
263 - const OfflineTtsVitsModelMetaData &meta_data); 361 + const OfflineTtsVitsModelMetaData &vits_meta_data);
  362 +
  363 +template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
  364 + AAssetManager *mgr, const std::string &tokens, const std::string &data_dir,
  365 + const OfflineTtsMatchaModelMetaData &matcha_meta_data);
264 #endif 366 #endif
265 367
266 #if __OHOS__ 368 #if __OHOS__
267 template PiperPhonemizeLexicon::PiperPhonemizeLexicon( 369 template PiperPhonemizeLexicon::PiperPhonemizeLexicon(
268 NativeResourceManager *mgr, const std::string &tokens, 370 NativeResourceManager *mgr, const std::string &tokens,
269 - const std::string &data_dir, const OfflineTtsVitsModelMetaData &meta_data); 371 + const std::string &data_dir,
  372 + const OfflineTtsMatchaModelMetaData &matcha_meta_data);
270 #endif 373 #endif
271 374
272 } // namespace sherpa_onnx 375 } // namespace sherpa_onnx
@@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
10 #include <vector> 10 #include <vector>
11 11
12 #include "sherpa-onnx/csrc/offline-tts-frontend.h" 12 #include "sherpa-onnx/csrc/offline-tts-frontend.h"
  13 +#include "sherpa-onnx/csrc/offline-tts-matcha-model-metadata.h"
13 #include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h" 14 #include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
14 15
15 namespace sherpa_onnx { 16 namespace sherpa_onnx {
@@ -17,20 +18,37 @@ namespace sherpa_onnx { @@ -17,20 +18,37 @@ namespace sherpa_onnx {
17 class PiperPhonemizeLexicon : public OfflineTtsFrontend { 18 class PiperPhonemizeLexicon : public OfflineTtsFrontend {
18 public: 19 public:
19 PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir, 20 PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
20 - const OfflineTtsVitsModelMetaData &meta_data); 21 + const OfflineTtsVitsModelMetaData &vits_meta_data);
  22 +
  23 + PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir,
  24 + const OfflineTtsMatchaModelMetaData &matcha_meta_data);
21 25
22 template <typename Manager> 26 template <typename Manager>
23 PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens, 27 PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
24 const std::string &data_dir, 28 const std::string &data_dir,
25 - const OfflineTtsVitsModelMetaData &meta_data); 29 + const OfflineTtsVitsModelMetaData &vits_meta_data);
  30 +
  31 + template <typename Manager>
  32 + PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens,
  33 + const std::string &data_dir,
  34 + const OfflineTtsMatchaModelMetaData &matcha_meta_data);
26 35
27 std::vector<TokenIDs> ConvertTextToTokenIds( 36 std::vector<TokenIDs> ConvertTextToTokenIds(
28 const std::string &text, const std::string &voice = "") const override; 37 const std::string &text, const std::string &voice = "") const override;
29 38
30 private: 39 private:
  40 + std::vector<TokenIDs> ConvertTextToTokenIdsVits(
  41 + const std::string &text, const std::string &voice = "") const;
  42 +
  43 + std::vector<TokenIDs> ConvertTextToTokenIdsMatcha(
  44 + const std::string &text, const std::string &voice = "") const;
  45 +
  46 + private:
31 // map unicode codepoint to an integer ID 47 // map unicode codepoint to an integer ID
32 std::unordered_map<char32_t, int32_t> token2id_; 48 std::unordered_map<char32_t, int32_t> token2id_;
33 - OfflineTtsVitsModelMetaData meta_data_; 49 + OfflineTtsVitsModelMetaData vits_meta_data_;
  50 + OfflineTtsMatchaModelMetaData matcha_meta_data_;
  51 + bool is_matcha_ = false;
34 }; 52 };
35 53
36 } // namespace sherpa_onnx 54 } // namespace sherpa_onnx