Committed by
GitHub
Add Pascal API for MatchaTTS models. (#1686)
正在显示
12 个修改的文件
包含
875 行增加
和
3 行删除
| @@ -152,6 +152,19 @@ jobs: | @@ -152,6 +152,19 @@ jobs: | ||
| 152 | 152 | ||
| 153 | ./run-piper.sh | 153 | ./run-piper.sh |
| 154 | rm -rf vits-piper-* | 154 | rm -rf vits-piper-* |
| 155 | + rm piper | ||
| 156 | + ls -lh | ||
| 157 | + echo "---" | ||
| 158 | + | ||
| 159 | + ./run-matcha-zh.sh | ||
| 160 | + rm -rf matcha-icefall-* | ||
| 161 | + rm matcha-zh | ||
| 162 | + ls -lh | ||
| 163 | + echo "---" | ||
| 164 | + | ||
| 165 | + ./run-matcha-en.sh | ||
| 166 | + rm -rf matcha-icefall-* | ||
| 167 | + rm matcha-en | ||
| 155 | ls -lh | 168 | ls -lh |
| 156 | echo "---" | 169 | echo "---" |
| 157 | 170 |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program matcha_en_playback; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Piper models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +Note that it plays the audio back as it is still generating. | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + {$ifdef unix} | ||
| 16 | + cthreads, | ||
| 17 | + {$endif} | ||
| 18 | + SysUtils, | ||
| 19 | + dos, | ||
| 20 | + ctypes, | ||
| 21 | + portaudio, | ||
| 22 | + sherpa_onnx; | ||
| 23 | + | ||
| 24 | +var | ||
| 25 | + CriticalSection: TRTLCriticalSection; | ||
| 26 | + | ||
| 27 | + Tts: TSherpaOnnxOfflineTts; | ||
| 28 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 29 | + Resampler: TSherpaOnnxLinearResampler; | ||
| 30 | + | ||
| 31 | + Text: AnsiString; | ||
| 32 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 33 | + SpeakerId: Integer = 0; | ||
| 34 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 35 | + FinishedGeneration: Boolean = False; | ||
| 36 | + FinishedPlaying: Boolean = False; | ||
| 37 | + | ||
| 38 | + Version: String; | ||
| 39 | + EnvStr: String; | ||
| 40 | + Status: Integer; | ||
| 41 | + NumDevices: Integer; | ||
| 42 | + DeviceIndex: Integer; | ||
| 43 | + DeviceInfo: PPaDeviceInfo; | ||
| 44 | + | ||
| 45 | + { If you get EDivByZero: Division by zero error, please change the sample rate | ||
| 46 | + to the one supported by your microphone. | ||
| 47 | + } | ||
| 48 | + DeviceSampleRate: Integer = 48000; | ||
| 49 | + I: Integer; | ||
| 50 | + Param: TPaStreamParameters; | ||
| 51 | + Stream: PPaStream; | ||
| 52 | + Wave: TSherpaOnnxWave; | ||
| 53 | + | ||
| 54 | +function GenerateCallback( | ||
| 55 | + Samples: pcfloat; N: cint32; | ||
| 56 | + Arg: Pointer): cint; cdecl; | ||
| 57 | +begin | ||
| 58 | + EnterCriticalSection(CriticalSection); | ||
| 59 | + try | ||
| 60 | + if Resampler <> nil then | ||
| 61 | + Buffer.Push(Resampler.Resample(Samples, N, False)) | ||
| 62 | + else | ||
| 63 | + Buffer.Push(Samples, N); | ||
| 64 | + finally | ||
| 65 | + LeaveCriticalSection(CriticalSection); | ||
| 66 | + end; | ||
| 67 | + | ||
| 68 | + { 1 means to continue generating; 0 means to stop generating. } | ||
| 69 | + Result := 1; | ||
| 70 | +end; | ||
| 71 | + | ||
| 72 | +function PlayCallback( | ||
| 73 | + input: Pointer; output: Pointer; | ||
| 74 | + frameCount: culong; | ||
| 75 | + timeInfo: PPaStreamCallbackTimeInfo; | ||
| 76 | + statusFlags: TPaStreamCallbackFlags; | ||
| 77 | + userData: Pointer ): cint; cdecl; | ||
| 78 | +var | ||
| 79 | + Samples: TSherpaOnnxSamplesArray; | ||
| 80 | + I: Integer; | ||
| 81 | +begin | ||
| 82 | + EnterCriticalSection(CriticalSection); | ||
| 83 | + try | ||
| 84 | + if Buffer.Size >= frameCount then | ||
| 85 | + begin | ||
| 86 | + Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
| 87 | + Buffer.Pop(FrameCount); | ||
| 88 | + end | ||
| 89 | + else if Buffer.Size > 0 then | ||
| 90 | + begin | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
| 92 | + Buffer.Pop(Buffer.Size); | ||
| 93 | + SetLength(Samples, frameCount); | ||
| 94 | + end | ||
| 95 | + else | ||
| 96 | + SetLength(Samples, frameCount); | ||
| 97 | + | ||
| 98 | + for I := 0 to frameCount - 1 do | ||
| 99 | + pcfloat(output)[I] := Samples[I]; | ||
| 100 | + | ||
| 101 | + if (Buffer.Size > 0) or (not FinishedGeneration) then | ||
| 102 | + Result := paContinue | ||
| 103 | + else | ||
| 104 | + begin | ||
| 105 | + Result := paComplete; | ||
| 106 | + FinishedPlaying := True; | ||
| 107 | + end; | ||
| 108 | + finally | ||
| 109 | + LeaveCriticalSection(CriticalSection); | ||
| 110 | + end; | ||
| 111 | +end; | ||
| 112 | + | ||
| 113 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 114 | +var | ||
| 115 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 116 | +begin | ||
| 117 | + Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; | ||
| 118 | + Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | ||
| 119 | + Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; | ||
| 120 | + Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; | ||
| 121 | + Config.Model.NumThreads := 1; | ||
| 122 | + Config.Model.Debug := False; | ||
| 123 | + Config.MaxNumSentences := 1; | ||
| 124 | + | ||
| 125 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 126 | +end; | ||
| 127 | + | ||
| 128 | +begin | ||
| 129 | + Tts := GetOfflineTts; | ||
| 130 | + if Tts.GetSampleRate <> DeviceSampleRate then | ||
| 131 | + Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate); | ||
| 132 | + | ||
| 133 | + Version := String(Pa_GetVersionText); | ||
| 134 | + WriteLn('Version is ', Version); | ||
| 135 | + Status := Pa_Initialize; | ||
| 136 | + if Status <> paNoError then | ||
| 137 | + begin | ||
| 138 | + WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 139 | + Exit; | ||
| 140 | + end; | ||
| 141 | + | ||
| 142 | + NumDevices := Pa_GetDeviceCount; | ||
| 143 | + WriteLn('Num devices: ', NumDevices); | ||
| 144 | + | ||
| 145 | + DeviceIndex := Pa_GetDefaultOutputDevice; | ||
| 146 | + | ||
| 147 | + if DeviceIndex = paNoDevice then | ||
| 148 | + begin | ||
| 149 | + WriteLn('No default output device found'); | ||
| 150 | + Pa_Terminate; | ||
| 151 | + Exit; | ||
| 152 | + end; | ||
| 153 | + | ||
| 154 | + EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
| 155 | + if EnvStr <> '' then | ||
| 156 | + begin | ||
| 157 | + DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
| 158 | + WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
| 159 | + end; | ||
| 160 | + | ||
| 161 | + for I := 0 to (NumDevices - 1) do | ||
| 162 | + begin | ||
| 163 | + DeviceInfo := Pa_GetDeviceInfo(I); | ||
| 164 | + if I = DeviceIndex then | ||
| 165 | + { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
| 166 | + WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
| 167 | + else | ||
| 168 | + WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
| 169 | + end; | ||
| 170 | + | ||
| 171 | + WriteLn('Use device ', DeviceIndex); | ||
| 172 | + WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
| 173 | + WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
| 174 | + | ||
| 175 | + Initialize(Param); | ||
| 176 | + Param.Device := DeviceIndex; | ||
| 177 | + Param.ChannelCount := 1; | ||
| 178 | + Param.SampleFormat := paFloat32; | ||
| 179 | + param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
| 180 | + param.HostApiSpecificStreamInfo := nil; | ||
| 181 | + | ||
| 182 | + Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate); | ||
| 183 | + | ||
| 184 | + | ||
| 185 | + { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. } | ||
| 186 | + Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
| 187 | + PPaStreamCallback(@PlayCallback), nil); | ||
| 188 | + | ||
| 189 | + if Status <> paNoError then | ||
| 190 | + begin | ||
| 191 | + WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
| 192 | + Pa_Terminate; | ||
| 193 | + Exit; | ||
| 194 | + end; | ||
| 195 | + | ||
| 196 | + InitCriticalSection(CriticalSection); | ||
| 197 | + | ||
| 198 | + Status := Pa_StartStream(stream); | ||
| 199 | + if Status <> paNoError then | ||
| 200 | + begin | ||
| 201 | + WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
| 202 | + Pa_Terminate; | ||
| 203 | + Exit; | ||
| 204 | + end; | ||
| 205 | + | ||
| 206 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 207 | + | ||
| 208 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 209 | + | ||
| 210 | + Audio := Tts.Generate(Text, SpeakerId, Speed, | ||
| 211 | + PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | ||
| 212 | + FinishedGeneration := True; | ||
| 213 | + SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate); | ||
| 214 | + WriteLn('Saved to ./matcha-zh-playback.wav'); | ||
| 215 | + | ||
| 216 | + while not FinishedPlaying do | ||
| 217 | + Pa_Sleep(100); {sleep for 0.1 second } | ||
| 218 | + {TODO(fangjun): Use an event to indicate the play is finished} | ||
| 219 | + | ||
| 220 | + DoneCriticalSection(CriticalSection); | ||
| 221 | + | ||
| 222 | + FreeAndNil(Tts); | ||
| 223 | + FreeAndNil(Resampler); | ||
| 224 | + | ||
| 225 | + Status := Pa_CloseStream(stream); | ||
| 226 | + if Status <> paNoError then | ||
| 227 | + begin | ||
| 228 | + WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
| 229 | + Exit; | ||
| 230 | + end; | ||
| 231 | + | ||
| 232 | + Status := Pa_Terminate; | ||
| 233 | + if Status <> paNoError then | ||
| 234 | + begin | ||
| 235 | + WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 236 | + Exit; | ||
| 237 | + end; | ||
| 238 | +end. | ||
| 239 | + |
pascal-api-examples/tts/matcha-en.pas
0 → 100644
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program matcha_en; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with MatchaTTS models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +If you want to play it while it is generating, please see | ||
| 10 | +./matcha-zh-playback.pas | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + SysUtils, | ||
| 17 | + sherpa_onnx; | ||
| 18 | + | ||
| 19 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 22 | +begin | ||
| 23 | + Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; | ||
| 24 | + Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | ||
| 25 | + Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; | ||
| 26 | + Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; | ||
| 27 | + Config.Model.NumThreads := 1; | ||
| 28 | + Config.Model.Debug := False; | ||
| 29 | + Config.MaxNumSentences := 1; | ||
| 30 | + | ||
| 31 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 32 | +end; | ||
| 33 | + | ||
| 34 | +var | ||
| 35 | + Tts: TSherpaOnnxOfflineTts; | ||
| 36 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 37 | + | ||
| 38 | + Text: AnsiString; | ||
| 39 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 40 | + SpeakerId: Integer = 0; | ||
| 41 | + | ||
| 42 | +begin | ||
| 43 | + Tts := GetOfflineTts; | ||
| 44 | + | ||
| 45 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 46 | + | ||
| 47 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 48 | + | ||
| 49 | + Audio := Tts.Generate(Text, SpeakerId, Speed); | ||
| 50 | + SherpaOnnxWriteWave('./matcha-en.wav', Audio.Samples, Audio.SampleRate); | ||
| 51 | + WriteLn('Saved to ./matcha-en.wav'); | ||
| 52 | + | ||
| 53 | + FreeAndNil(Tts); | ||
| 54 | +end. | ||
| 55 | + |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program matcha_zh_playback; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Piper models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +Note that it plays the audio back as it is still generating. | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + {$ifdef unix} | ||
| 16 | + cthreads, | ||
| 17 | + {$endif} | ||
| 18 | + SysUtils, | ||
| 19 | + dos, | ||
| 20 | + ctypes, | ||
| 21 | + portaudio, | ||
| 22 | + sherpa_onnx; | ||
| 23 | + | ||
| 24 | +var | ||
| 25 | + CriticalSection: TRTLCriticalSection; | ||
| 26 | + | ||
| 27 | + Tts: TSherpaOnnxOfflineTts; | ||
| 28 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 29 | + Resampler: TSherpaOnnxLinearResampler; | ||
| 30 | + | ||
| 31 | + Text: AnsiString; | ||
| 32 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 33 | + SpeakerId: Integer = 0; | ||
| 34 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 35 | + FinishedGeneration: Boolean = False; | ||
| 36 | + FinishedPlaying: Boolean = False; | ||
| 37 | + | ||
| 38 | + Version: String; | ||
| 39 | + EnvStr: String; | ||
| 40 | + Status: Integer; | ||
| 41 | + NumDevices: Integer; | ||
| 42 | + DeviceIndex: Integer; | ||
| 43 | + DeviceInfo: PPaDeviceInfo; | ||
| 44 | + | ||
| 45 | + { If you get EDivByZero: Division by zero error, please change the sample rate | ||
| 46 | + to the one supported by your microphone. | ||
| 47 | + } | ||
| 48 | + DeviceSampleRate: Integer = 48000; | ||
| 49 | + I: Integer; | ||
| 50 | + Param: TPaStreamParameters; | ||
| 51 | + Stream: PPaStream; | ||
| 52 | + Wave: TSherpaOnnxWave; | ||
| 53 | + | ||
| 54 | +function GenerateCallback( | ||
| 55 | + Samples: pcfloat; N: cint32; | ||
| 56 | + Arg: Pointer): cint; cdecl; | ||
| 57 | +begin | ||
| 58 | + EnterCriticalSection(CriticalSection); | ||
| 59 | + try | ||
| 60 | + if Resampler <> nil then | ||
| 61 | + Buffer.Push(Resampler.Resample(Samples, N, False)) | ||
| 62 | + else | ||
| 63 | + Buffer.Push(Samples, N); | ||
| 64 | + finally | ||
| 65 | + LeaveCriticalSection(CriticalSection); | ||
| 66 | + end; | ||
| 67 | + | ||
| 68 | + { 1 means to continue generating; 0 means to stop generating. } | ||
| 69 | + Result := 1; | ||
| 70 | +end; | ||
| 71 | + | ||
| 72 | +function PlayCallback( | ||
| 73 | + input: Pointer; output: Pointer; | ||
| 74 | + frameCount: culong; | ||
| 75 | + timeInfo: PPaStreamCallbackTimeInfo; | ||
| 76 | + statusFlags: TPaStreamCallbackFlags; | ||
| 77 | + userData: Pointer ): cint; cdecl; | ||
| 78 | +var | ||
| 79 | + Samples: TSherpaOnnxSamplesArray; | ||
| 80 | + I: Integer; | ||
| 81 | +begin | ||
| 82 | + EnterCriticalSection(CriticalSection); | ||
| 83 | + try | ||
| 84 | + if Buffer.Size >= frameCount then | ||
| 85 | + begin | ||
| 86 | + Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
| 87 | + Buffer.Pop(FrameCount); | ||
| 88 | + end | ||
| 89 | + else if Buffer.Size > 0 then | ||
| 90 | + begin | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
| 92 | + Buffer.Pop(Buffer.Size); | ||
| 93 | + SetLength(Samples, frameCount); | ||
| 94 | + end | ||
| 95 | + else | ||
| 96 | + SetLength(Samples, frameCount); | ||
| 97 | + | ||
| 98 | + for I := 0 to frameCount - 1 do | ||
| 99 | + pcfloat(output)[I] := Samples[I]; | ||
| 100 | + | ||
| 101 | + if (Buffer.Size > 0) or (not FinishedGeneration) then | ||
| 102 | + Result := paContinue | ||
| 103 | + else | ||
| 104 | + begin | ||
| 105 | + Result := paComplete; | ||
| 106 | + FinishedPlaying := True; | ||
| 107 | + end; | ||
| 108 | + finally | ||
| 109 | + LeaveCriticalSection(CriticalSection); | ||
| 110 | + end; | ||
| 111 | +end; | ||
| 112 | + | ||
| 113 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 114 | +var | ||
| 115 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 116 | +begin | ||
| 117 | + Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; | ||
| 118 | + Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | ||
| 119 | + Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; | ||
| 120 | + Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; | ||
| 121 | + Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; | ||
| 122 | + Config.Model.NumThreads := 1; | ||
| 123 | + Config.Model.Debug := False; | ||
| 124 | + Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst'; | ||
| 125 | + Config.MaxNumSentences := 1; | ||
| 126 | + | ||
| 127 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 128 | +end; | ||
| 129 | + | ||
| 130 | +begin | ||
| 131 | + Tts := GetOfflineTts; | ||
| 132 | + if Tts.GetSampleRate <> DeviceSampleRate then | ||
| 133 | + Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate); | ||
| 134 | + | ||
| 135 | + Version := String(Pa_GetVersionText); | ||
| 136 | + WriteLn('Version is ', Version); | ||
| 137 | + Status := Pa_Initialize; | ||
| 138 | + if Status <> paNoError then | ||
| 139 | + begin | ||
| 140 | + WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 141 | + Exit; | ||
| 142 | + end; | ||
| 143 | + | ||
| 144 | + NumDevices := Pa_GetDeviceCount; | ||
| 145 | + WriteLn('Num devices: ', NumDevices); | ||
| 146 | + | ||
| 147 | + DeviceIndex := Pa_GetDefaultOutputDevice; | ||
| 148 | + | ||
| 149 | + if DeviceIndex = paNoDevice then | ||
| 150 | + begin | ||
| 151 | + WriteLn('No default output device found'); | ||
| 152 | + Pa_Terminate; | ||
| 153 | + Exit; | ||
| 154 | + end; | ||
| 155 | + | ||
| 156 | + EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
| 157 | + if EnvStr <> '' then | ||
| 158 | + begin | ||
| 159 | + DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
| 160 | + WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
| 161 | + end; | ||
| 162 | + | ||
| 163 | + for I := 0 to (NumDevices - 1) do | ||
| 164 | + begin | ||
| 165 | + DeviceInfo := Pa_GetDeviceInfo(I); | ||
| 166 | + if I = DeviceIndex then | ||
| 167 | + { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
| 168 | + WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
| 169 | + else | ||
| 170 | + WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
| 171 | + end; | ||
| 172 | + | ||
| 173 | + WriteLn('Use device ', DeviceIndex); | ||
| 174 | + WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
| 175 | + WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
| 176 | + | ||
| 177 | + Initialize(Param); | ||
| 178 | + Param.Device := DeviceIndex; | ||
| 179 | + Param.ChannelCount := 1; | ||
| 180 | + Param.SampleFormat := paFloat32; | ||
| 181 | + param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
| 182 | + param.HostApiSpecificStreamInfo := nil; | ||
| 183 | + | ||
| 184 | + Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate); | ||
| 185 | + | ||
| 186 | + | ||
| 187 | + { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. } | ||
| 188 | + Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
| 189 | + PPaStreamCallback(@PlayCallback), nil); | ||
| 190 | + | ||
| 191 | + if Status <> paNoError then | ||
| 192 | + begin | ||
| 193 | + WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
| 194 | + Pa_Terminate; | ||
| 195 | + Exit; | ||
| 196 | + end; | ||
| 197 | + | ||
| 198 | + InitCriticalSection(CriticalSection); | ||
| 199 | + | ||
| 200 | + Status := Pa_StartStream(stream); | ||
| 201 | + if Status <> paNoError then | ||
| 202 | + begin | ||
| 203 | + WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
| 204 | + Pa_Terminate; | ||
| 205 | + Exit; | ||
| 206 | + end; | ||
| 207 | + | ||
| 208 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 209 | + | ||
| 210 | + Text := '某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。'; | ||
| 211 | + | ||
| 212 | + Audio := Tts.Generate(Text, SpeakerId, Speed, | ||
| 213 | + PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | ||
| 214 | + FinishedGeneration := True; | ||
| 215 | + SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate); | ||
| 216 | + WriteLn('Saved to ./matcha-zh-playback.wav'); | ||
| 217 | + | ||
| 218 | + while not FinishedPlaying do | ||
| 219 | + Pa_Sleep(100); {sleep for 0.1 second } | ||
| 220 | + {TODO(fangjun): Use an event to indicate the play is finished} | ||
| 221 | + | ||
| 222 | + DoneCriticalSection(CriticalSection); | ||
| 223 | + | ||
| 224 | + FreeAndNil(Tts); | ||
| 225 | + FreeAndNil(Resampler); | ||
| 226 | + | ||
| 227 | + Status := Pa_CloseStream(stream); | ||
| 228 | + if Status <> paNoError then | ||
| 229 | + begin | ||
| 230 | + WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
| 231 | + Exit; | ||
| 232 | + end; | ||
| 233 | + | ||
| 234 | + Status := Pa_Terminate; | ||
| 235 | + if Status <> paNoError then | ||
| 236 | + begin | ||
| 237 | + WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 238 | + Exit; | ||
| 239 | + end; | ||
| 240 | +end. | ||
| 241 | + |
pascal-api-examples/tts/matcha-zh.pas
0 → 100644
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program matcha_zh; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with MatchaTTS models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +If you want to play it while it is generating, please see | ||
| 10 | +./matcha-zh-playback.pas | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + SysUtils, | ||
| 17 | + sherpa_onnx; | ||
| 18 | + | ||
| 19 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 22 | +begin | ||
| 23 | + Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; | ||
| 24 | + Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; | ||
| 25 | + Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; | ||
| 26 | + Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; | ||
| 27 | + Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; | ||
| 28 | + Config.Model.NumThreads := 1; | ||
| 29 | + Config.Model.Debug := False; | ||
| 30 | + Config.RuleFsts := './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst'; | ||
| 31 | + Config.MaxNumSentences := 1; | ||
| 32 | + | ||
| 33 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 34 | +end; | ||
| 35 | + | ||
| 36 | +var | ||
| 37 | + Tts: TSherpaOnnxOfflineTts; | ||
| 38 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 39 | + | ||
| 40 | + Text: AnsiString; | ||
| 41 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 42 | + SpeakerId: Integer = 0; | ||
| 43 | + | ||
| 44 | +begin | ||
| 45 | + Tts := GetOfflineTts; | ||
| 46 | + | ||
| 47 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 48 | + | ||
| 49 | + Text := '某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。'; | ||
| 50 | + | ||
| 51 | + Audio := Tts.Generate(Text, SpeakerId, Speed); | ||
| 52 | + SherpaOnnxWriteWave('./matcha-zh.wav', Audio.Samples, Audio.SampleRate); | ||
| 53 | + WriteLn('Saved to ./matcha-zh.wav'); | ||
| 54 | + | ||
| 55 | + FreeAndNil(Tts); | ||
| 56 | +end. | ||
| 57 | + |
| 1 | { Copyright (c) 2024 Xiaomi Corporation } | 1 | { Copyright (c) 2024 Xiaomi Corporation } |
| 2 | -program piper; | 2 | +program piper_playback; |
| 3 | { | 3 | { |
| 4 | This file shows how to use the text to speech API of sherpa-onnx | 4 | This file shows how to use the text to speech API of sherpa-onnx |
| 5 | with Piper models. | 5 | with Piper models. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 28 | +# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 29 | +# to download more models | ||
| 30 | +if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 32 | + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 33 | + rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +fpc \ | ||
| 41 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 42 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 43 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 44 | + -Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
| 45 | + ./matcha-en-playback.pas | ||
| 46 | + | ||
| 47 | +# Please see ../portaudio-test/README.md | ||
| 48 | +# for how to install portaudio on macOS | ||
| 49 | + | ||
| 50 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 51 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 52 | + | ||
| 53 | +./matcha-en-playback |
pascal-api-examples/tts/run-matcha-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 28 | +# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 29 | +# to download more models | ||
| 30 | +if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 31 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 32 | + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 33 | + rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 37 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +fpc \ | ||
| 41 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 42 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 43 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 44 | + ./matcha-en.pas | ||
| 45 | + | ||
| 46 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 47 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 48 | + | ||
| 49 | +./matcha-en |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | ||
| 28 | +# to download more models | ||
| 29 | +if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 30 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
| 31 | + tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
| 32 | + rm matcha-icefall-zh-baker.tar.bz2 | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 36 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 37 | +fi | ||
| 38 | + | ||
| 39 | +fpc \ | ||
| 40 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 41 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 42 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 43 | + -Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
| 44 | + ./matcha-zh-playback.pas | ||
| 45 | + | ||
| 46 | +# Please see ../portaudio-test/README.md | ||
| 47 | +# for how to install portaudio on macOS | ||
| 48 | + | ||
| 49 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 50 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 51 | + | ||
| 52 | +./matcha-zh-playback |
pascal-api-examples/tts/run-matcha-zh.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | ||
| 28 | +# to download more models | ||
| 29 | +if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 30 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
| 31 | + tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
| 32 | + rm matcha-icefall-zh-baker.tar.bz2 | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 36 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 37 | +fi | ||
| 38 | + | ||
| 39 | +fpc \ | ||
| 40 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 41 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 42 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 43 | + ./matcha-zh.pas | ||
| 44 | + | ||
| 45 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 46 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 47 | + | ||
| 48 | +./matcha-zh |
| @@ -62,11 +62,26 @@ type | @@ -62,11 +62,26 @@ type | ||
| 62 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig); | 62 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig); |
| 63 | end; | 63 | end; |
| 64 | 64 | ||
| 65 | + TSherpaOnnxOfflineTtsMatchaModelConfig = record | ||
| 66 | + AcousticModel: AnsiString; | ||
| 67 | + Vocoder: AnsiString; | ||
| 68 | + Lexicon: AnsiString; | ||
| 69 | + Tokens: AnsiString; | ||
| 70 | + DataDir: AnsiString; | ||
| 71 | + NoiseScale: Single; | ||
| 72 | + LengthScale: Single; | ||
| 73 | + DictDir: AnsiString; | ||
| 74 | + | ||
| 75 | + function ToString: AnsiString; | ||
| 76 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig); | ||
| 77 | + end; | ||
| 78 | + | ||
| 65 | TSherpaOnnxOfflineTtsModelConfig = record | 79 | TSherpaOnnxOfflineTtsModelConfig = record |
| 66 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; | 80 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; |
| 67 | NumThreads: Integer; | 81 | NumThreads: Integer; |
| 68 | Debug: Boolean; | 82 | Debug: Boolean; |
| 69 | Provider: AnsiString; | 83 | Provider: AnsiString; |
| 84 | + Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig; | ||
| 70 | 85 | ||
| 71 | function ToString: AnsiString; | 86 | function ToString: AnsiString; |
| 72 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); | 87 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); |
| @@ -713,11 +728,23 @@ type | @@ -713,11 +728,23 @@ type | ||
| 713 | DictDir: PAnsiChar; | 728 | DictDir: PAnsiChar; |
| 714 | end; | 729 | end; |
| 715 | 730 | ||
| 731 | + SherpaOnnxOfflineTtsMatchaModelConfig = record | ||
| 732 | + AcousticModel: PAnsiChar; | ||
| 733 | + Vocoder: PAnsiChar; | ||
| 734 | + Lexicon: PAnsiChar; | ||
| 735 | + Tokens: PAnsiChar; | ||
| 736 | + DataDir: PAnsiChar; | ||
| 737 | + NoiseScale: cfloat; | ||
| 738 | + LengthScale: cfloat; | ||
| 739 | + DictDir: PAnsiChar; | ||
| 740 | + end; | ||
| 741 | + | ||
| 716 | SherpaOnnxOfflineTtsModelConfig = record | 742 | SherpaOnnxOfflineTtsModelConfig = record |
| 717 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; | 743 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; |
| 718 | NumThreads: cint32; | 744 | NumThreads: cint32; |
| 719 | Debug: cint32; | 745 | Debug: cint32; |
| 720 | Provider: PAnsiChar; | 746 | Provider: PAnsiChar; |
| 747 | + Matcha: SherpaOnnxOfflineTtsMatchaModelConfig; | ||
| 721 | end; | 748 | end; |
| 722 | 749 | ||
| 723 | SherpaOnnxOfflineTtsConfig = record | 750 | SherpaOnnxOfflineTtsConfig = record |
| @@ -1853,15 +1880,40 @@ begin | @@ -1853,15 +1880,40 @@ begin | ||
| 1853 | Dest.LengthScale := 1.0; | 1880 | Dest.LengthScale := 1.0; |
| 1854 | end; | 1881 | end; |
| 1855 | 1882 | ||
| 1883 | +function TSherpaOnnxOfflineTtsMatchaModelConfig.ToString: AnsiString; | ||
| 1884 | +begin | ||
| 1885 | + Result := Format('TSherpaOnnxOfflineTtsMatchaModelConfig(' + | ||
| 1886 | + 'AcousticModel := %s, ' + | ||
| 1887 | + 'Vocoder := %s, ' + | ||
| 1888 | + 'Lexicon := %s, ' + | ||
| 1889 | + 'Tokens := %s, ' + | ||
| 1890 | + 'DataDir := %s, ' + | ||
| 1891 | + 'NoiseScale := %.2f, ' + | ||
| 1892 | + 'LengthScale := %.2f, ' + | ||
| 1893 | + 'DictDir := %s' + | ||
| 1894 | + ')', | ||
| 1895 | + [Self.AcousticModel, Self.Vocoder, Self.Lexicon, Self.Tokens, | ||
| 1896 | + Self.DataDir, Self.NoiseScale, Self.LengthScale, Self.DictDir | ||
| 1897 | + ]); | ||
| 1898 | +end; | ||
| 1899 | + | ||
| 1900 | +class operator TSherpaOnnxOfflineTtsMatchaModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig); | ||
| 1901 | +begin | ||
| 1902 | + Dest.NoiseScale := 0.667; | ||
| 1903 | + Dest.LengthScale := 1.0; | ||
| 1904 | +end; | ||
| 1905 | + | ||
| 1856 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; | 1906 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; |
| 1857 | begin | 1907 | begin |
| 1858 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + | 1908 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + |
| 1859 | 'Vits := %s, ' + | 1909 | 'Vits := %s, ' + |
| 1860 | 'NumThreads := %d, ' + | 1910 | 'NumThreads := %d, ' + |
| 1861 | 'Debug := %s, ' + | 1911 | 'Debug := %s, ' + |
| 1862 | - 'Provider := %s' + | 1912 | + 'Provider := %s, ' + |
| 1913 | + 'Matcha := %s' + | ||
| 1863 | ')', | 1914 | ')', |
| 1864 | - [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider | 1915 | + [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 1916 | + Self.Matcha.ToString | ||
| 1865 | ]); | 1917 | ]); |
| 1866 | end; | 1918 | end; |
| 1867 | 1919 | ||
| @@ -1905,6 +1957,15 @@ begin | @@ -1905,6 +1957,15 @@ begin | ||
| 1905 | C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale; | 1957 | C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale; |
| 1906 | C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir); | 1958 | C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir); |
| 1907 | 1959 | ||
| 1960 | + C.Model.Matcha.AcousticModel := PAnsiChar(Config.Model.Matcha.AcousticModel); | ||
| 1961 | + C.Model.Matcha.Vocoder := PAnsiChar(Config.Model.Matcha.Vocoder); | ||
| 1962 | + C.Model.Matcha.Lexicon := PAnsiChar(Config.Model.Matcha.Lexicon); | ||
| 1963 | + C.Model.Matcha.Tokens := PAnsiChar(Config.Model.Matcha.Tokens); | ||
| 1964 | + C.Model.Matcha.DataDir := PAnsiChar(Config.Model.Matcha.DataDir); | ||
| 1965 | + C.Model.Matcha.NoiseScale := Config.Model.Matcha.NoiseScale; | ||
| 1966 | + C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale; | ||
| 1967 | + C.Model.Matcha.DictDir := PAnsiChar(Config.Model.Matcha.DictDir); | ||
| 1968 | + | ||
| 1908 | C.Model.NumThreads := Config.Model.NumThreads; | 1969 | C.Model.NumThreads := Config.Model.NumThreads; |
| 1909 | C.Model.Provider := PAnsiChar(Config.Model.Provider); | 1970 | C.Model.Provider := PAnsiChar(Config.Model.Provider); |
| 1910 | C.Model.Debug := Ord(Config.Model.Debug); | 1971 | C.Model.Debug := Ord(Config.Model.Debug); |
-
请 注册 或 登录 后发表评论