Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)

Fangjun Kuang · GitHub
Commit 3eced3e7ee0ba5a51a3cba9fd819a5476940353f 3eced3e7 1 parent 1ef9e5ee
.github/scripts/test-dot-net.sh
.github/scripts/test-nodejs-npm.sh
.github/workflows/test-dot-net.yaml
dotnet-examples/offline-tts-play/Program.cs
dotnet-examples/offline-tts-play/run-hf-fanchen.sh
dotnet-examples/offline-tts-play/run-matcha-en.sh
dotnet-examples/offline-tts-play/run-matcha-zh.sh
dotnet-examples/offline-tts-play/run-piper.sh
dotnet-examples/offline-tts/Program.cs
dotnet-examples/offline-tts/run-aishell3.sh
dotnet-examples/offline-tts/run-hf-fanchen.sh
dotnet-examples/offline-tts/run-matcha-en.sh
dotnet-examples/offline-tts/run-matcha-zh.sh
dotnet-examples/offline-tts/run-piper.sh
nodejs-examples/README.md
nodejs-examples/test-offline-tts-matcha-en.js
nodejs-examples/test-offline-tts-matcha-zh.js
nodejs-examples/test-offline-tts-en.js → nodejs-examples/test-offline-tts-vits-en.js
nodejs-examples/test-offline-tts-zh.js → nodejs-examples/test-offline-tts-vits-zh.js
scripts/dotnet/OfflineTtsMatchaModelConfig.cs
--- a/.github/scripts/test-dot-net.sh
查看文件 @3eced3e
+++ b/.github/scripts/test-dot-net.sh
查看文件 @3eced3e
@@ -2,7 +2,27 @@
 
 cd dotnet-examples/
 
- cd ./offline-speaker-diarization
+ cd ./offline-tts
+ ./run-matcha-zh.sh
+ ls -lh *.wav
+ ./run-matcha-en.sh
+ ls -lh *.wav
+ ./run-aishell3.sh
+ ls -lh *.wav
+ ./run-piper.sh
+ ls -lh *.wav
+ ./run-hf-fanchen.sh
+ ls -lh *.wav
+ ls -lh
+ 
+ pushd ../..
+ 
+ mkdir tts
+ 
+ cp dotnet-examples/offline-tts/*.wav ./tts
+ popd
+ 
+ cd ../offline-speaker-diarization
 ./run.sh
 rm -rfv *.onnx
 rm -fv *.wav
@@ -76,14 +96,4 @@ cd ../spoken-language-identification
 ./run.sh
 rm -rf sherpa-onnx-*
 
- cd ../offline-tts
- ./run-aishell3.sh
- ./run-piper.sh
- ./run-hf-fanchen.sh
- ls -lh
 
- cd ../..
- 
- mkdir tts
- 
- cp dotnet-examples/offline-tts/*.wav ./tts
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @3eced3e
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @3eced3e
@@ -9,6 +9,48 @@ git status
 ls -lh
 ls -lh node_modules
 
+ # offline tts
+ #
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ node ./test-offline-tts-matcha-zh.js
+ 
+ rm -rf matcha-icefall-zh-baker
+ rm hifigan_v2.onnx
+ 
+ echo "---"
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ node ./test-offline-tts-matcha-en.js
+ 
+ rm -rf matcha-icefall-en_US-ljspeech
+ rm hifigan_v2.onnx
+ 
+ echo "---"
+ 
+ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
+ tar xf vits-piper-en_US-amy-low.tar.bz2
+ node ./test-offline-tts-vits-en.js
+ rm -rf vits-piper-en_US-amy-low*
+ 
+ echo "---"
+ 
+ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
+ tar xvf vits-icefall-zh-aishell3.tar.bz2
+ node ./test-offline-tts-vits-zh.js
+ rm -rf vits-icefall-zh-aishell3*
+ 
+ ls -lh *.wav
+ 
 echo '-----speaker diarization----------'
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
 tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
@@ -147,15 +189,3 @@ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 node ./test-online-zipformer2-ctc-hlg.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
- 
- # offline tts
- 
- curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
- tar xf vits-piper-en_US-amy-low.tar.bz2
- node ./test-offline-tts-en.js
- rm -rf vits-piper-en_US-amy-low*
- 
- curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
- tar xvf vits-icefall-zh-aishell3.tar.bz2
- node ./test-offline-tts-zh.js
- rm -rf vits-icefall-zh-aishell3*
--- a/.github/workflows/test-dot-net.yaml
查看文件 @3eced3e
+++ b/.github/workflows/test-dot-net.yaml
查看文件 @3eced3e
@@ -92,6 +92,50 @@ jobs:
         python-version: ["3.8"]
 
     steps:
+       - name: Check space
+         shell: bash
+         run: |
+           df -h
+ 
+       - name: Free space
+         shell: bash
+         run: |
+           df -h
+           rm -rf /opt/hostedtoolcache
+           df -h
+ 
+       - name: Free more space
+         shell: bash
+         run: |
+           # https://github.com/orgs/community/discussions/25678
+           cd /opt
+           find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
+ 
+           sudo rm -rf /usr/share/dotnet
+           sudo rm -rf "/usr/local/share/boost"
+           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+ 
+       - name: Free Disk Space (Ubuntu)
+         uses: jlumbroso/free-disk-space@main
+         with:
+           # this might remove tools that are actually needed,
+           # if set to "true" but frees about 6 GB
+           tool-cache: false
+ 
+           # all of these default to true, but feel free to set to
+           # "false" if necessary for your workflow
+           android: true
+           dotnet: false
+           haskell: true
+           large-packages: true
+           docker-images: false
+           swap-storage: true
+ 
+       - name: Check space
+         shell: bash
+         run: |
+           df -h
+ 
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
--- a/dotnet-examples/offline-tts-play/Program.cs
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts-play/Program.cs
查看文件 @3eced3e
@@ -21,48 +21,56 @@ class OfflineTtsPlayDemo
 {
   class Options
   {
- 
     [Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
-     public string? RuleFsts { get; set; }
+     public string RuleFsts { get; set; } = string.Empty;
+ 
+     [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
+     public string RuleFars { get; set; } = string.Empty;
 
-     [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
-     public string? DictDir { get; set; }
+     [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+     public string DictDir { get; set; } = string.Empty;
 
-     [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
-     public string? DataDir { get; set; }
+     [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+     public string DataDir { get; set; } = string.Empty;
 
-     [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
-     public float LengthScale { get; set; }
+     [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+     public float LengthScale { get; set; } = 1;
 
-     [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
-     public float NoiseScale { get; set; }
+     [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
+     public float NoiseScale { get; set; } = 0.667F;
 
-     [Option("vits-noise-scale-w", Required = false, Default = 0.8f, HelpText = "noise_scale_w for VITS models")]
-     public float NoiseScaleW { get; set; }
+     [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
+     public float NoiseScaleW { get; set; } = 0.8F;
 
-     [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
-     public string? Lexicon { get; set; }
+     [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+     public string Lexicon { get; set; } = string.Empty;
 
-     [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
-     public string? Tokens { get; set; }
+     [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
+     public string Tokens { get; set; } = string.Empty;
 
     [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
-     public int MaxNumSentences { get; set; }
+     public int MaxNumSentences { get; set; } = 1;
 
     [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
-     public int Debug { get; set; }
+     public int Debug { get; set; } = 0;
+ 
+     [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
+     public string Model { get; set; } = string.Empty;
 
-     [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
-     public string? Model { get; set; }
+     [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+     public string AcousticModel { get; set; } = "";
+ 
+     [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+     public string Vocoder { get; set; } = "";
 
     [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
-     public int SpeakerId { get; set; }
+     public int SpeakerId { get; set; } = 0;
 
     [Option("text", Required = true, HelpText = "Text to synthesize")]
-     public string? Text { get; set; }
+     public string Text { get; set; } = string.Empty;
 
     [Option("output-filename", Required = true, Default = "./generated.wav", HelpText = "Path to save the generated audio")]
-     public string? OutputFilename { get; set; }
+     public string OutputFilename { get; set; } = "./generated.wav";
   }
 
   static void Main(string[] args)
@@ -78,6 +86,42 @@ class OfflineTtsPlayDemo
   private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
   {
     string usage = @"
+ # matcha-icefall-zh-baker
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --dict-dir=./matcha-icefall-zh-baker/dict \
+   --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+ 
+ # matcha-icefall-en_US-ljspeech
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+ 
 # vits-aishell3
 
 wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
@@ -85,8 +129,8 @@ tar xf vits-zh-aishell3.tar.bz2
 
 dotnet run \
   --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
-   --vits-tokens=./vits-zh-aishell3/tokens.txt \
-   --vits-lexicon=./vits-zh-aishell3/lexicon.txt \
+   --tokens=./vits-zh-aishell3/tokens.txt \
+   --lexicon=./vits-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-zh-aishell3/rule.fst \
   --sid=66 \
   --debug=1 \
@@ -100,8 +144,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-   --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-   --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+   ---tokens=./vits-piper-en_US-amy-low/tokens.txt \
+   --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -124,6 +168,7 @@ to download more models.
   private static void Run(Options options)
   {
     var config = new OfflineTtsConfig();
+ 
     config.Model.Vits.Model = options.Model;
     config.Model.Vits.Lexicon = options.Lexicon;
     config.Model.Vits.Tokens = options.Tokens;
@@ -132,6 +177,16 @@ to download more models.
     config.Model.Vits.NoiseScale = options.NoiseScale;
     config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
     config.Model.Vits.LengthScale = options.LengthScale;
+ 
+     config.Model.Matcha.AcousticModel = options.AcousticModel;
+     config.Model.Matcha.Vocoder = options.Vocoder;
+     config.Model.Matcha.Lexicon = options.Lexicon;
+     config.Model.Matcha.Tokens = options.Tokens;
+     config.Model.Matcha.DataDir = options.DataDir;
+     config.Model.Matcha.DictDir = options.DictDir;
+     config.Model.Matcha.NoiseScale = options.NoiseScale;
+     config.Model.Matcha.LengthScale = options.LengthScale;
+ 
     config.Model.NumThreads = 1;
     config.Model.Debug = options.Debug;
     config.Model.Provider = "cpu";
--- a/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts-play/run-hf-fanchen.sh
查看文件 @3eced3e
@@ -8,8 +8,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
-   --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
-   --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+   --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+   --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
   --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
   --vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
   --sid=100 \
--- a/dotnet-examples/offline-tts-play/run-matcha-en.sh 0 → 100755
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts-play/run-matcha-en.sh 0 → 100755
查看文件 @3eced3e
+ #!/usr/bin/env bash
+ set -ex
+ 
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+   tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+   rm matcha-icefall-en_US-ljspeech.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+   --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+   --debug=1 \
+   --output-filename=./matcha-en.wav \
+   --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
--- a/dotnet-examples/offline-tts-play/run-matcha-zh.sh 0 → 100755
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts-play/run-matcha-zh.sh 0 → 100755
查看文件 @3eced3e
+ #!/usr/bin/env bash
+ set -ex
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+   tar xvf matcha-icefall-zh-baker.tar.bz2
+   rm matcha-icefall-zh-baker.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --dict-dir=./matcha-icefall-zh-baker/dict \
+   --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
--- a/dotnet-examples/offline-tts-play/run-piper.sh
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts-play/run-piper.sh
查看文件 @3eced3e
@@ -9,8 +9,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-   --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-   --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+   --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+   --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text="This is a text to speech application in dotnet with Next Generation Kaldi"
--- a/dotnet-examples/offline-tts/Program.cs
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/Program.cs
查看文件 @3eced3e
@@ -20,25 +20,25 @@ class OfflineTtsDemo
     [Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
     public string RuleFars { get; set; } = string.Empty;
 
-     [Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
+     [Option("dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
     public string DictDir { get; set; } = string.Empty;
 
-     [Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
+     [Option("data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
     public string DataDir { get; set; } = string.Empty;
 
-     [Option("vits-length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
+     [Option("length-scale", Required = false, Default = 1, HelpText = "speech speed. Larger->Slower; Smaller->faster")]
     public float LengthScale { get; set; } = 1;
 
-     [Option("vits-noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS models")]
+     [Option("noise-scale", Required = false, Default = 0.667f, HelpText = "noise_scale for VITS or Matcha models")]
     public float NoiseScale { get; set; } = 0.667F;
 
     [Option("vits-noise-scale-w", Required = false, Default = 0.8F, HelpText = "noise_scale_w for VITS models")]
     public float NoiseScaleW { get; set; } = 0.8F;
 
-     [Option("vits-lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
+     [Option("lexicon", Required = false, Default = "", HelpText = "Path to lexicon.txt")]
     public string Lexicon { get; set; } = string.Empty;
 
-     [Option("vits-tokens", Required = false, Default = "", HelpText = "Path to tokens.txt")]
+     [Option("tokens", Required = true, Default = "", HelpText = "Path to tokens.txt")]
     public string Tokens { get; set; } = string.Empty;
 
     [Option("tts-max-num-sentences", Required = false, Default = 1, HelpText = "Maximum number of sentences that we process at a time.")]
@@ -47,9 +47,15 @@ class OfflineTtsDemo
     [Option(Required = false, Default = 0, HelpText = "1 to show debug messages.")]
     public int Debug { get; set; } = 0;
 
-     [Option("vits-model", Required = true, HelpText = "Path to VITS model")]
+     [Option("vits-model", Required = false, HelpText = "Path to VITS model")]
     public string Model { get; set; } = string.Empty;
 
+     [Option("matcha-acoustic-model", Required = false, HelpText = "Path to the acoustic model of Matcha")]
+     public string AcousticModel { get; set; } = "";
+ 
+     [Option("matcha-vocoder", Required = false, HelpText = "Path to the vocoder model of Matcha")]
+     public string Vocoder { get; set; } = "";
+ 
     [Option("sid", Required = false, Default = 0, HelpText = "Speaker ID")]
     public int SpeakerId { get; set; } = 0;
 
@@ -73,6 +79,42 @@ class OfflineTtsDemo
   private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs)
   {
     var usage = @"
+ # matcha-icefall-zh-baker
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --dict-dir=./matcha-icefall-zh-baker/dict \
+   --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text='某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+ 
+ # matcha-icefall-en_US-ljspeech
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+ 
 # vits-aishell3
 
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
@@ -80,8 +122,8 @@ tar xvf vits-icefall-zh-aishell3.tar.bz2
 
 dotnet run \
   --vits-model=./vits-icefall-zh-aishell3/model.onnx \
-   --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
-   --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+   --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+   --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
   --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
   --sid=66 \
@@ -96,8 +138,8 @@ tar xf vits-piper-en_US-amy-low.tar.bz2
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-   --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-   --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+   --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+   --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text='This is a text to speech application in dotnet with Next Generation Kaldi'
@@ -128,6 +170,16 @@ to download more models.
     config.Model.Vits.NoiseScale = options.NoiseScale;
     config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
     config.Model.Vits.LengthScale = options.LengthScale;
+ 
+     config.Model.Matcha.AcousticModel = options.AcousticModel;
+     config.Model.Matcha.Vocoder = options.Vocoder;
+     config.Model.Matcha.Lexicon = options.Lexicon;
+     config.Model.Matcha.Tokens = options.Tokens;
+     config.Model.Matcha.DataDir = options.DataDir;
+     config.Model.Matcha.DictDir = options.DictDir;
+     config.Model.Matcha.NoiseScale = options.NoiseScale;
+     config.Model.Matcha.LengthScale = options.LengthScale;
+ 
     config.Model.NumThreads = 1;
     config.Model.Debug = options.Debug;
     config.Model.Provider = "cpu";
--- a/dotnet-examples/offline-tts/run-aishell3.sh
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/run-aishell3.sh
查看文件 @3eced3e
@@ -8,8 +8,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-icefall-zh-aishell3/model.onnx \
-   --vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
-   --vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
+   --tokens=./vits-icefall-zh-aishell3/tokens.txt \
+   --lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
   --tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
   --tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
   --sid=66 \
--- a/dotnet-examples/offline-tts/run-hf-fanchen.sh
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/run-hf-fanchen.sh
查看文件 @3eced3e
@@ -8,10 +8,10 @@ fi
 
 dotnet run \
   --vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
-   --vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
-   --vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
+   --tokens=./vits-zh-hf-fanchen-C/tokens.txt \
+   --lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
   --tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
-   --vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
+   --dict-dir=./vits-zh-hf-fanchen-C/dict \
   --sid=100 \
   --debug=1 \
   --output-filename=./fanchen-100.wav \
--- a/dotnet-examples/offline-tts/run-matcha-en.sh 0 → 100755
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/run-matcha-en.sh 0 → 100755
查看文件 @3eced3e
+ #!/usr/bin/env bash
+ set -ex
+ 
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+   tar xf matcha-icefall-en_US-ljspeech.tar.bz2
+   rm matcha-icefall-en_US-ljspeech.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
+   --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
+   --debug=1 \
+   --output-filename=./matcha-en.wav \
+   --text='Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
--- a/dotnet-examples/offline-tts/run-matcha-zh.sh 0 → 100755
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/run-matcha-zh.sh 0 → 100755
查看文件 @3eced3e
+ #!/usr/bin/env bash
+ set -ex
+ 
+ # please visit
+ # https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
+ # to download more models
+ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+   tar xvf matcha-icefall-zh-baker.tar.bz2
+   rm matcha-icefall-zh-baker.tar.bz2
+ fi
+ 
+ if [ ! -f ./hifigan_v2.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ fi
+ 
+ 
+ dotnet run \
+   --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
+   --matcha-vocoder=./hifigan_v2.onnx \
+   --lexicon=./matcha-icefall-zh-baker/lexicon.txt \
+   --tokens=./matcha-icefall-zh-baker/tokens.txt \
+   --dict-dir=./matcha-icefall-zh-baker/dict \
+   --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
+   --debug=1 \
+   --output-filename=./matcha-zh.wav \
+   --text="某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。"
--- a/dotnet-examples/offline-tts/run-piper.sh
查看文件 @3eced3e
+++ b/dotnet-examples/offline-tts/run-piper.sh
查看文件 @3eced3e
@@ -10,8 +10,8 @@ fi
 
 dotnet run \
   --vits-model=./vits-piper-en_US-amy-low/en_US-amy-low.onnx \
-   --vits-tokens=./vits-piper-en_US-amy-low/tokens.txt \
-   --vits-data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
+   --tokens=./vits-piper-en_US-amy-low/tokens.txt \
+   --data-dir=./vits-piper-en_US-amy-low/espeak-ng-data \
   --debug=1 \
   --output-filename=./amy.wav \
   --text="This is a text to speech application in dotnet with Next Generation Kaldi"
--- a/nodejs-examples/README.md
查看文件 @3eced3e
+++ b/nodejs-examples/README.md
查看文件 @3eced3e
@@ -42,9 +42,45 @@ node ./test-offline-speaker-diarization.js
 
 In the following, we demonstrate how to run text-to-speech.
 
- ## ./test-offline-tts-en.js
+ ## ./test-offline-tts-matcha-zh.js
 
- [./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
+ [./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
+ [matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
+ for text-to-speech.
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+ tar xvf matcha-icefall-zh-baker.tar.bz2
+ rm matcha-icefall-zh-baker.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ node ./test-offline-tts-matcha-zh.js
+ ```
+ 
+ ## ./test-offline-tts-matcha-en.js
+ 
+ [./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
+ [matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
+ for text-to-speech.
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+ tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+ rm matcha-icefall-en_US-ljspeech.tar.bz2
+ 
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+ 
+ node ./test-offline-tts-matcha-en.js
+ ```
+ 
+ ## ./test-offline-tts-vits-en.js
+ 
+ [./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
 [vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
 for text-to-speech.
 
@@ -53,12 +89,12 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 tar xvf vits-piper-en_US-amy-low.tar.bz2
- node ./test-offline-tts-en.js
+ node ./test-offline-tts-vits-en.js
 ```
 
- ## ./test-offline-tts-zh.js
+ ## ./test-offline-tts-vits-zh.js
 
- [./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
+ [./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
 a VITS pretrained model
 [aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
 for text-to-speech.
@@ -68,7 +104,7 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
 tar xvf vits-icefall-zh-aishell3.tar.bz2
- node ./test-offline-tts-zh.js
+ node ./test-offline-tts-vits-zh.js
 ```
 
 # Speech-to-text
--- a/nodejs-examples/test-offline-tts-matcha-en.js 0 → 100644
查看文件 @3eced3e
+++ b/nodejs-examples/test-offline-tts-matcha-en.js 0 → 100644
查看文件 @3eced3e
+ // Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOfflineTts() {
+   let offlineTtsMatchaModelConfig = {
+     acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
+     vocoder: './hifigan_v2.onnx',
+     lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
+     tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
+     dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
+ 
+     noiseScale: 0.667,
+     lengthScale: 1.0,
+   };
+   let offlineTtsModelConfig = {
+     offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+     numThreads: 1,
+     debug: 1,
+     provider: 'cpu',
+   };
+ 
+   let offlineTtsConfig = {
+     offlineTtsModelConfig: offlineTtsModelConfig,
+     maxNumSentences: 1,
+   };
+ 
+   return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+ }
+ 
+ const tts = createOfflineTts();
+ const speakerId = 0;
+ const speed = 1.0;
+ const text =
+     'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+ 
+ const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+ tts.save('./test-matcha-en.wav', audio);
+ console.log('Saved to test-matcha-en.wav successfully.');
+ tts.free();
--- a/nodejs-examples/test-offline-tts-matcha-zh.js 0 → 100644
查看文件 @3eced3e
+++ b/nodejs-examples/test-offline-tts-matcha-zh.js 0 → 100644
查看文件 @3eced3e
+ // Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOfflineTts() {
+   let offlineTtsMatchaModelConfig = {
+     acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
+     vocoder: './hifigan_v2.onnx',
+     lexicon: './matcha-icefall-zh-baker/lexicon.txt',
+     tokens: './matcha-icefall-zh-baker/tokens.txt',
+     dictDir: './matcha-icefall-zh-baker/dict',
+     noiseScale: 0.667,
+     lengthScale: 1.0,
+   };
+   let offlineTtsModelConfig = {
+     offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+     numThreads: 1,
+     debug: 1,
+     provider: 'cpu',
+   };
+ 
+   let offlineTtsConfig = {
+     offlineTtsModelConfig: offlineTtsModelConfig,
+     maxNumSentences: 1,
+     ruleFsts:
+         './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
+   };
+ 
+   return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+ }
+ 
+ const tts = createOfflineTts();
+ const speakerId = 0;
+ const speed = 1.0;
+ const text =
+     '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+ 
+ const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+ tts.save('./test-matcha-zh.wav', audio);
+ console.log('Saved to test-matcha-zh.wav successfully.');
+ tts.free();
--- a/nodejs-examples/test-offline-tts-en.js → nodejs-examples/test-offline-tts-vits-en.js
查看文件 @3eced3e
+++ b/nodejs-examples/test-offline-tts-en.js → nodejs-examples/test-offline-tts-vits-en.js
查看文件 @3eced3e
@@ -37,7 +37,7 @@ const audio = tts.generate({
   speed: speed
 });
 
- tts.save('./test-en.wav', audio);
- console.log('Saved to test-en.wav successfully.');
+ tts.save('./test-vits-en.wav', audio);
+ console.log('Saved to test-vits-en.wav successfully.');
 
 tts.free();
--- a/nodejs-examples/test-offline-tts-zh.js → nodejs-examples/test-offline-tts-vits-zh.js
查看文件 @3eced3e
+++ b/nodejs-examples/test-offline-tts-zh.js → nodejs-examples/test-offline-tts-vits-zh.js
查看文件 @3eced3e
@@ -34,6 +34,6 @@ const speakerId = 66;
 const speed = 1.0;
 const audio = tts.generate(
     {text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
- tts.save('./test-zh.wav', audio);
- console.log('Saved to test-zh.wav successfully.');
+ tts.save('./test-vits-zh.wav', audio);
+ console.log('Saved to test-vits-zh.wav successfully.');
 tts.free();
--- a/scripts/dotnet/OfflineTtsMatchaModelConfig.cs 0 → 100644
查看文件 @3eced3e
+++ b/scripts/dotnet/OfflineTtsMatchaModelConfig.cs 0 → 100644
查看文件 @3eced3e
+ /// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ using System.Runtime.InteropServices;
+ 
+ namespace SherpaOnnx
+ {
+     [StructLayout(LayoutKind.Sequential)]
+     public struct OfflineTtsMatchaModelConfig
+     {
+         public OfflineTtsMatchaModelConfig()
+         {
+             AcousticModel = "";
+             Vocoder = "";
+             Lexicon = "";
+             Tokens = "";
+             DataDir = "";
+ 
+             NoiseScale = 0.667F;
+             LengthScale = 1.0F;
+ 
+             DictDir = "";
+         }
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string AcousticModel;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Vocoder;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Lexicon;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string Tokens;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string DataDir;
+ 
+         public float NoiseScale;
+         public float LengthScale;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string DictDir;
+     }
+ }
--- a/scripts/dotnet/OfflineTtsModelConfig.cs
查看文件 @3eced3e
+++ b/scripts/dotnet/OfflineTtsModelConfig.cs
查看文件 @3eced3e
@@ -11,6 +11,7 @@ namespace SherpaOnnx
         public OfflineTtsModelConfig()
         {
             Vits = new OfflineTtsVitsModelConfig();
+             Matcha = new OfflineTtsMatchaModelConfig();
             NumThreads = 1;
             Debug = 0;
             Provider = "cpu";
@@ -21,5 +22,7 @@ namespace SherpaOnnx
         public int Debug;
         [MarshalAs(UnmanagedType.LPStr)]
         public string Provider;
+ 
+         public OfflineTtsMatchaModelConfig Matcha;
     }
 }
--- a/scripts/dotnet/examples/Common.csproj
查看文件 @3eced3e
+++ b/scripts/dotnet/examples/Common.csproj
查看文件 @3eced3e
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-     <TargetFramework>.net6</TargetFramework>
+     <TargetFramework>net8.0</TargetFramework>
     <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
   </PropertyGroup>
 
--- a/scripts/dotnet/sherpa-onnx.csproj.in
查看文件 @3eced3e
+++ b/scripts/dotnet/sherpa-onnx.csproj.in
查看文件 @3eced3e
@@ -4,7 +4,7 @@
     <PackageReadmeFile>README.md</PackageReadmeFile>
     <OutputType>Library</OutputType>
     <LangVersion>10.0</LangVersion>
-     <TargetFrameworks>net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
+     <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
     <RuntimeIdentifiers>linux-x64;linux-arm64;osx-x64;osx-arm64;win-x64;win-x86;win-arm64</RuntimeIdentifiers>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <AssemblyName>sherpa-onnx</AssemblyName>
--- a/scripts/dotnet/sherpa-onnx.csproj.runtime.in
查看文件 @3eced3e
+++ b/scripts/dotnet/sherpa-onnx.csproj.runtime.in
查看文件 @3eced3e
@@ -3,7 +3,7 @@
     <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression>
     <PackageReadmeFile>README.md</PackageReadmeFile>
     <OutputType>Library</OutputType>
-     <TargetFrameworks>net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
+     <TargetFrameworks>net8.0;net7.0;net6.0;net45;net40;net35;net20;netstandard2.0</TargetFrameworks>
     <RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier>
     <AssemblyName>sherpa-onnx</AssemblyName>
     <Version>{{ version }}</Version>
--- a/wasm/tts/sherpa-onnx-tts.js
查看文件 @3eced3e
+++ b/wasm/tts/sherpa-onnx-tts.js
查看文件 @3eced3e
@@ -8,6 +8,10 @@ function freeConfig(config, Module) {
     freeConfig(config.config, Module)
   }
 
+   if ('config2' in config) {
+     freeConfig(config.config2, Module)
+   }
+ 
   Module._free(config.ptr);
 }
 
@@ -66,11 +70,103 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
   }
 }
 
+ function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) {
+   const acousticModelLen = Module.lengthBytesUTF8(config.acousticModel) + 1;
+   const vocoderLen = Module.lengthBytesUTF8(config.vocoder) + 1;
+   const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
+   const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
+   const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
+   const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
+ 
+   const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen +
+       dataDirLen + dictDirLen;
+ 
+   const buffer = Module._malloc(n);
+ 
+   const len = 8 * 4;
+   const ptr = Module._malloc(len);
+ 
+   let offset = 0;
+   Module.stringToUTF8(
+       config.acousticModel || '', buffer + offset, acousticModelLen);
+   offset += acousticModelLen;
+ 
+   Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
+   offset += vocoderLen;
+ 
+   Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
+   offset += lexiconLen;
+ 
+   Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
+   offset += tokensLen;
+ 
+   Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
+   offset += dataDirLen;
+ 
+   Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
+   offset += dictDirLen;
+ 
+   offset = 0;
+   Module.setValue(ptr, buffer + offset, 'i8*');
+   offset += acousticModelLen;
+ 
+   Module.setValue(ptr + 4, buffer + offset, 'i8*');
+   offset += vocoderLen;
+ 
+   Module.setValue(ptr + 8, buffer + offset, 'i8*');
+   offset += lexiconLen;
+ 
+   Module.setValue(ptr + 12, buffer + offset, 'i8*');
+   offset += tokensLen;
+ 
+   Module.setValue(ptr + 16, buffer + offset, 'i8*');
+   offset += dataDirLen;
+ 
+   Module.setValue(ptr + 20, config.noiseScale || 0.667, 'float');
+   Module.setValue(ptr + 24, config.lengthScale || 1.0, 'float');
+   Module.setValue(ptr + 28, buffer + offset, 'i8*');
+   offset += dictDirLen;
+ 
+   return {
+     buffer: buffer, ptr: ptr, len: len,
+   }
+ }
+ 
 function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
+   if (!('offlineTtsVitsModelConfig' in config)) {
+     config.offlineTtsVitsModelConfig = {
+       model: '',
+       lexicon: '',
+       tokens: '',
+       noiseScale: 0.667,
+       noiseScaleW: 0.8,
+       lengthScale: 1.0,
+       dataDir: '',
+       dictDir: '',
+     };
+   }
+ 
+   if (!('offlineTtsMatchaModelConfig' in config)) {
+     config.offlineTtsMatchaModelConfig = {
+       acousticModel: '',
+       vocoder: '',
+       lexicon: '',
+       tokens: '',
+       noiseScale: 0.667,
+       lengthScale: 1.0,
+       dataDir: '',
+       dictDir: '',
+     };
+   }
+ 
+ 
   const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
       config.offlineTtsVitsModelConfig, Module);
 
-   const len = vitsModelConfig.len + 3 * 4;
+   const matchaModelConfig = initSherpaOnnxOfflineTtsMatchaModelConfig(
+       config.offlineTtsMatchaModelConfig, Module);
+ 
+   const len = vitsModelConfig.len + matchaModelConfig.len + 3 * 4;
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -87,9 +183,14 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
   const buffer = Module._malloc(providerLen);
   Module.stringToUTF8(config.provider, buffer, providerLen);
   Module.setValue(ptr + offset, buffer, 'i8*');
+   offset += 4;
+ 
+   Module._CopyHeap(matchaModelConfig.ptr, matchaModelConfig.len, ptr + offset);
+   offset += matchaModelConfig.len;
 
   return {
     buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
+         config2: matchaModelConfig
   }
 }
 
@@ -195,12 +296,26 @@ function createOfflineTts(Module, myConfig) {
     noiseScaleW: 0.8,
     lengthScale: 1.0,
   };
+ 
+   const offlineTtsMatchaModelConfig = {
+     acousticModel: '',
+     vocoder: '',
+     lexicon: '',
+     tokens: '',
+     dataDir: '',
+     dictDir: '',
+     noiseScale: 0.667,
+     lengthScale: 1.0,
+   };
+ 
   const offlineTtsModelConfig = {
     offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
+     offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
     numThreads: 1,
     debug: 1,
     provider: 'cpu',
   };
+ 
   let offlineTtsConfig = {
     offlineTtsModelConfig: offlineTtsModelConfig,
     ruleFsts: '',
--- a/wasm/tts/sherpa-onnx-wasm-main-tts.cc
查看文件 @3eced3e
+++ b/wasm/tts/sherpa-onnx-wasm-main-tts.cc
查看文件 @3eced3e
@@ -14,8 +14,10 @@
 extern "C" {
 
 static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
+ static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
-                   sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
+                   sizeof(SherpaOnnxOfflineTtsVitsModelConfig) +
+                       sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + 3 * 4,
               "");
 static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
                   sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4,
@@ -24,6 +26,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
 void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
   auto tts_model_config = &tts_config->model;
   auto vits_model_config = &tts_model_config->vits;
+   auto matcha_model_config = &tts_model_config->matcha;
   fprintf(stdout, "----------vits model config----------\n");
   fprintf(stdout, "model: %s\n", vits_model_config->model);
   fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
@@ -34,6 +37,16 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
   fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
   fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);
 
+   fprintf(stdout, "----------matcha model config----------\n");
+   fprintf(stdout, "acoustic_model: %s\n", matcha_model_config->acoustic_model);
+   fprintf(stdout, "vocoder: %s\n", matcha_model_config->vocoder);
+   fprintf(stdout, "lexicon: %s\n", matcha_model_config->lexicon);
+   fprintf(stdout, "tokens: %s\n", matcha_model_config->tokens);
+   fprintf(stdout, "data_dir: %s\n", matcha_model_config->data_dir);
+   fprintf(stdout, "noise scale: %.3f\n", matcha_model_config->noise_scale);
+   fprintf(stdout, "length scale: %.3f\n", matcha_model_config->length_scale);
+   fprintf(stdout, "dict_dir: %s\n", matcha_model_config->dict_dir);
+ 
   fprintf(stdout, "----------tts model config----------\n");
   fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
   fprintf(stdout, "debug: %d\n", tts_model_config->debug);