Committed by
GitHub
Add Pascal API for Kokoro TTS models (#1724)
正在显示
10 个修改的文件
包含
444 行增加
和
7 行删除
| @@ -154,6 +154,12 @@ jobs: | @@ -154,6 +154,12 @@ jobs: | ||
| 154 | ls -lh | 154 | ls -lh |
| 155 | echo "---" | 155 | echo "---" |
| 156 | 156 | ||
| 157 | + ./run-kokoro-en.sh | ||
| 158 | + rm -rf kokoro-en-* | ||
| 159 | + rm kokoro-en | ||
| 160 | + ls -lh | ||
| 161 | + echo "---" | ||
| 162 | + | ||
| 157 | ./run-matcha-zh.sh | 163 | ./run-matcha-zh.sh |
| 158 | rm -rf matcha-icefall-* | 164 | rm -rf matcha-icefall-* |
| 159 | rm matcha-zh | 165 | rm matcha-zh |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program kokoro_en_playback; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Kokoro models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +Note that it plays the audio back as it is still generating. | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + {$ifdef unix} | ||
| 16 | + cthreads, | ||
| 17 | + {$endif} | ||
| 18 | + SysUtils, | ||
| 19 | + dos, | ||
| 20 | + ctypes, | ||
| 21 | + portaudio, | ||
| 22 | + sherpa_onnx; | ||
| 23 | + | ||
| 24 | +var | ||
| 25 | + CriticalSection: TRTLCriticalSection; | ||
| 26 | + | ||
| 27 | + Tts: TSherpaOnnxOfflineTts; | ||
| 28 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 29 | + Resampler: TSherpaOnnxLinearResampler; | ||
| 30 | + | ||
| 31 | + Text: AnsiString; | ||
| 32 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 33 | + SpeakerId: Integer = 7; | ||
| 34 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 35 | + FinishedGeneration: Boolean = False; | ||
| 36 | + FinishedPlaying: Boolean = False; | ||
| 37 | + | ||
| 38 | + Version: String; | ||
| 39 | + EnvStr: String; | ||
| 40 | + Status: Integer; | ||
| 41 | + NumDevices: Integer; | ||
| 42 | + DeviceIndex: Integer; | ||
| 43 | + DeviceInfo: PPaDeviceInfo; | ||
| 44 | + | ||
| 45 | + { If you get EDivByZero: Division by zero error, please change the sample rate | ||
| 46 | + to the one supported by your microphone. | ||
| 47 | + } | ||
| 48 | + DeviceSampleRate: Integer = 48000; | ||
| 49 | + I: Integer; | ||
| 50 | + Param: TPaStreamParameters; | ||
| 51 | + Stream: PPaStream; | ||
| 52 | + Wave: TSherpaOnnxWave; | ||
| 53 | + | ||
| 54 | +function GenerateCallback( | ||
| 55 | + Samples: pcfloat; N: cint32; | ||
| 56 | + Arg: Pointer): cint; cdecl; | ||
| 57 | +begin | ||
| 58 | + EnterCriticalSection(CriticalSection); | ||
| 59 | + try | ||
| 60 | + if Resampler <> nil then | ||
| 61 | + Buffer.Push(Resampler.Resample(Samples, N, False)) | ||
| 62 | + else | ||
| 63 | + Buffer.Push(Samples, N); | ||
| 64 | + finally | ||
| 65 | + LeaveCriticalSection(CriticalSection); | ||
| 66 | + end; | ||
| 67 | + | ||
| 68 | + { 1 means to continue generating; 0 means to stop generating. } | ||
| 69 | + Result := 1; | ||
| 70 | +end; | ||
| 71 | + | ||
| 72 | +function PlayCallback( | ||
| 73 | + input: Pointer; output: Pointer; | ||
| 74 | + frameCount: culong; | ||
| 75 | + timeInfo: PPaStreamCallbackTimeInfo; | ||
| 76 | + statusFlags: TPaStreamCallbackFlags; | ||
| 77 | + userData: Pointer ): cint; cdecl; | ||
| 78 | +var | ||
| 79 | + Samples: TSherpaOnnxSamplesArray; | ||
| 80 | + I: Integer; | ||
| 81 | +begin | ||
| 82 | + EnterCriticalSection(CriticalSection); | ||
| 83 | + try | ||
| 84 | + if Buffer.Size >= frameCount then | ||
| 85 | + begin | ||
| 86 | + Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
| 87 | + Buffer.Pop(FrameCount); | ||
| 88 | + end | ||
| 89 | + else if Buffer.Size > 0 then | ||
| 90 | + begin | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
| 92 | + Buffer.Pop(Buffer.Size); | ||
| 93 | + SetLength(Samples, frameCount); | ||
| 94 | + end | ||
| 95 | + else | ||
| 96 | + SetLength(Samples, frameCount); | ||
| 97 | + | ||
| 98 | + for I := 0 to frameCount - 1 do | ||
| 99 | + pcfloat(output)[I] := Samples[I]; | ||
| 100 | + | ||
| 101 | + if (Buffer.Size > 0) or (not FinishedGeneration) then | ||
| 102 | + Result := paContinue | ||
| 103 | + else | ||
| 104 | + begin | ||
| 105 | + Result := paComplete; | ||
| 106 | + FinishedPlaying := True; | ||
| 107 | + end; | ||
| 108 | + finally | ||
| 109 | + LeaveCriticalSection(CriticalSection); | ||
| 110 | + end; | ||
| 111 | +end; | ||
| 112 | + | ||
| 113 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 114 | +var | ||
| 115 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 116 | +begin | ||
| 117 | + Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx'; | ||
| 118 | + Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin'; | ||
| 119 | + Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt'; | ||
| 120 | + Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data'; | ||
| 121 | + Config.Model.NumThreads := 2; | ||
| 122 | + Config.Model.Debug := False; | ||
| 123 | + Config.MaxNumSentences := 1; | ||
| 124 | + | ||
| 125 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 126 | +end; | ||
| 127 | + | ||
| 128 | +begin | ||
| 129 | + Tts := GetOfflineTts; | ||
| 130 | + if Tts.GetSampleRate <> DeviceSampleRate then | ||
| 131 | + Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate); | ||
| 132 | + | ||
| 133 | + Version := String(Pa_GetVersionText); | ||
| 134 | + WriteLn('Version is ', Version); | ||
| 135 | + Status := Pa_Initialize; | ||
| 136 | + if Status <> paNoError then | ||
| 137 | + begin | ||
| 138 | + WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 139 | + Exit; | ||
| 140 | + end; | ||
| 141 | + | ||
| 142 | + NumDevices := Pa_GetDeviceCount; | ||
| 143 | + WriteLn('Num devices: ', NumDevices); | ||
| 144 | + | ||
| 145 | + DeviceIndex := Pa_GetDefaultOutputDevice; | ||
| 146 | + | ||
| 147 | + if DeviceIndex = paNoDevice then | ||
| 148 | + begin | ||
| 149 | + WriteLn('No default output device found'); | ||
| 150 | + Pa_Terminate; | ||
| 151 | + Exit; | ||
| 152 | + end; | ||
| 153 | + | ||
| 154 | + EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
| 155 | + if EnvStr <> '' then | ||
| 156 | + begin | ||
| 157 | + DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
| 158 | + WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
| 159 | + end; | ||
| 160 | + | ||
| 161 | + for I := 0 to (NumDevices - 1) do | ||
| 162 | + begin | ||
| 163 | + DeviceInfo := Pa_GetDeviceInfo(I); | ||
| 164 | + if I = DeviceIndex then | ||
| 165 | + { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
| 166 | + WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
| 167 | + else | ||
| 168 | + WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
| 169 | + end; | ||
| 170 | + | ||
| 171 | + WriteLn('Use device ', DeviceIndex); | ||
| 172 | + WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
| 173 | + WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
| 174 | + | ||
| 175 | + Initialize(Param); | ||
| 176 | + Param.Device := DeviceIndex; | ||
| 177 | + Param.ChannelCount := 1; | ||
| 178 | + Param.SampleFormat := paFloat32; | ||
| 179 | + param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
| 180 | + param.HostApiSpecificStreamInfo := nil; | ||
| 181 | + | ||
| 182 | + Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate); | ||
| 183 | + | ||
| 184 | + | ||
| 185 | + { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. } | ||
| 186 | + Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
| 187 | + PPaStreamCallback(@PlayCallback), nil); | ||
| 188 | + | ||
| 189 | + if Status <> paNoError then | ||
| 190 | + begin | ||
| 191 | + WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
| 192 | + Pa_Terminate; | ||
| 193 | + Exit; | ||
| 194 | + end; | ||
| 195 | + | ||
| 196 | + InitCriticalSection(CriticalSection); | ||
| 197 | + | ||
| 198 | + Status := Pa_StartStream(stream); | ||
| 199 | + if Status <> paNoError then | ||
| 200 | + begin | ||
| 201 | + WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
| 202 | + Pa_Terminate; | ||
| 203 | + Exit; | ||
| 204 | + end; | ||
| 205 | + | ||
| 206 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 207 | + | ||
| 208 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 209 | + | ||
| 210 | + Audio := Tts.Generate(Text, SpeakerId, Speed, | ||
| 211 | + PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | ||
| 212 | + FinishedGeneration := True; | ||
| 213 | + SherpaOnnxWriteWave('./kokoro-en-playback-7.wav', Audio.Samples, Audio.SampleRate); | ||
| 214 | + WriteLn('Saved to ./kokoro-en-playback-7.wav'); | ||
| 215 | + | ||
| 216 | + while not FinishedPlaying do | ||
| 217 | + Pa_Sleep(100); {sleep for 0.1 second } | ||
| 218 | + {TODO(fangjun): Use an event to indicate the play is finished} | ||
| 219 | + | ||
| 220 | + DoneCriticalSection(CriticalSection); | ||
| 221 | + | ||
| 222 | + FreeAndNil(Tts); | ||
| 223 | + FreeAndNil(Resampler); | ||
| 224 | + | ||
| 225 | + Status := Pa_CloseStream(stream); | ||
| 226 | + if Status <> paNoError then | ||
| 227 | + begin | ||
| 228 | + WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
| 229 | + Exit; | ||
| 230 | + end; | ||
| 231 | + | ||
| 232 | + Status := Pa_Terminate; | ||
| 233 | + if Status <> paNoError then | ||
| 234 | + begin | ||
| 235 | + WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 236 | + Exit; | ||
| 237 | + end; | ||
| 238 | +end. | ||
| 239 | + |
pascal-api-examples/tts/kokoro-en.pas
0 → 100644
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program kokoro_en; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Kokoro TTS models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +If you want to play it while it is generating, please see | ||
| 10 | +./kokoro-en-playback.pas | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + SysUtils, | ||
| 17 | + sherpa_onnx; | ||
| 18 | + | ||
| 19 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 22 | +begin | ||
| 23 | + Config.Model.Kokoro.Model := './kokoro-en-v0_19/model.onnx'; | ||
| 24 | + Config.Model.Kokoro.Voices := './kokoro-en-v0_19/voices.bin'; | ||
| 25 | + Config.Model.Kokoro.Tokens := './kokoro-en-v0_19/tokens.txt'; | ||
| 26 | + Config.Model.Kokoro.DataDir := './kokoro-en-v0_19/espeak-ng-data'; | ||
| 27 | + Config.Model.NumThreads := 2; | ||
| 28 | + Config.Model.Debug := False; | ||
| 29 | + Config.MaxNumSentences := 1; | ||
| 30 | + | ||
| 31 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 32 | +end; | ||
| 33 | + | ||
| 34 | +var | ||
| 35 | + Tts: TSherpaOnnxOfflineTts; | ||
| 36 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 37 | + | ||
| 38 | + Text: AnsiString; | ||
| 39 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 40 | + SpeakerId: Integer = 8; | ||
| 41 | + | ||
| 42 | +begin | ||
| 43 | + Tts := GetOfflineTts; | ||
| 44 | + | ||
| 45 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 46 | + | ||
| 47 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 48 | + | ||
| 49 | + Audio := Tts.Generate(Text, SpeakerId, Speed); | ||
| 50 | + SherpaOnnxWriteWave('./kokoro-en-8.wav', Audio.Samples, Audio.SampleRate); | ||
| 51 | + WriteLn('Saved to ./kokoro-en-8.wav'); | ||
| 52 | + | ||
| 53 | + FreeAndNil(Tts); | ||
| 54 | +end. | ||
| 55 | + |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | program matcha_en_playback; | 2 | program matcha_en_playback; |
| 3 | { | 3 | { |
| 4 | This file shows how to use the text to speech API of sherpa-onnx | 4 | This file shows how to use the text to speech API of sherpa-onnx |
| 5 | -with Piper models. | 5 | +with MatchaTTS models. |
| 6 | 6 | ||
| 7 | It generates speech from text and saves it to a wave file. | 7 | It generates speech from text and saves it to a wave file. |
| 8 | 8 | ||
| @@ -210,8 +210,8 @@ begin | @@ -210,8 +210,8 @@ begin | ||
| 210 | Audio := Tts.Generate(Text, SpeakerId, Speed, | 210 | Audio := Tts.Generate(Text, SpeakerId, Speed, |
| 211 | PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | 211 | PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); |
| 212 | FinishedGeneration := True; | 212 | FinishedGeneration := True; |
| 213 | - SherpaOnnxWriteWave('./matcha-zh-playback.wav', Audio.Samples, Audio.SampleRate); | ||
| 214 | - WriteLn('Saved to ./matcha-zh-playback.wav'); | 213 | + SherpaOnnxWriteWave('./matcha-en-playback.wav', Audio.Samples, Audio.SampleRate); |
| 214 | + WriteLn('Saved to ./matcha-en-playback.wav'); | ||
| 215 | 215 | ||
| 216 | while not FinishedPlaying do | 216 | while not FinishedPlaying do |
| 217 | Pa_Sleep(100); {sleep for 0.1 second } | 217 | Pa_Sleep(100); {sleep for 0.1 second } |
| @@ -7,7 +7,7 @@ with MatchaTTS models. | @@ -7,7 +7,7 @@ with MatchaTTS models. | ||
| 7 | It generates speech from text and saves it to a wave file. | 7 | It generates speech from text and saves it to a wave file. |
| 8 | 8 | ||
| 9 | If you want to play it while it is generating, please see | 9 | If you want to play it while it is generating, please see |
| 10 | -./matcha-zh-playback.pas | 10 | +./matcha-en-playback.pas |
| 11 | } | 11 | } |
| 12 | 12 | ||
| 13 | {$mode objfpc} | 13 | {$mode objfpc} |
| @@ -2,7 +2,7 @@ | @@ -2,7 +2,7 @@ | ||
| 2 | program matcha_zh_playback; | 2 | program matcha_zh_playback; |
| 3 | { | 3 | { |
| 4 | This file shows how to use the text to speech API of sherpa-onnx | 4 | This file shows how to use the text to speech API of sherpa-onnx |
| 5 | -with Piper models. | 5 | +with MatchaTTS models. |
| 6 | 6 | ||
| 7 | It generates speech from text and saves it to a wave file. | 7 | It generates speech from text and saves it to a wave file. |
| 8 | 8 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 28 | +if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 30 | + tar xf kokoro-en-v0_19.tar.bz2 | ||
| 31 | + rm kokoro-en-v0_19.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 36 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 37 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 38 | + -Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
| 39 | + ./kokoro-en-playback.pas | ||
| 40 | + | ||
| 41 | +# Please see ../portaudio-test/README.md | ||
| 42 | +# for how to install portaudio on macOS | ||
| 43 | + | ||
| 44 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 45 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 46 | + | ||
| 47 | +./kokoro-en-playback |
pascal-api-examples/tts/run-kokoro-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html | ||
| 28 | +if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 | ||
| 30 | + tar xf kokoro-en-v0_19.tar.bz2 | ||
| 31 | + rm kokoro-en-v0_19.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 36 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 37 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 38 | + ./kokoro-en.pas | ||
| 39 | + | ||
| 40 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 41 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 42 | + | ||
| 43 | +./kokoro-en |
| @@ -76,12 +76,24 @@ type | @@ -76,12 +76,24 @@ type | ||
| 76 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig); | 76 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsMatchaModelConfig); |
| 77 | end; | 77 | end; |
| 78 | 78 | ||
| 79 | + TSherpaOnnxOfflineTtsKokoroModelConfig = record | ||
| 80 | + Model: AnsiString; | ||
| 81 | + Voices: AnsiString; | ||
| 82 | + Tokens: AnsiString; | ||
| 83 | + DataDir: AnsiString; | ||
| 84 | + LengthScale: Single; | ||
| 85 | + | ||
| 86 | + function ToString: AnsiString; | ||
| 87 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig); | ||
| 88 | + end; | ||
| 89 | + | ||
| 79 | TSherpaOnnxOfflineTtsModelConfig = record | 90 | TSherpaOnnxOfflineTtsModelConfig = record |
| 80 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; | 91 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; |
| 81 | NumThreads: Integer; | 92 | NumThreads: Integer; |
| 82 | Debug: Boolean; | 93 | Debug: Boolean; |
| 83 | Provider: AnsiString; | 94 | Provider: AnsiString; |
| 84 | Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig; | 95 | Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig; |
| 96 | + Kokoro: TSherpaOnnxOfflineTtsKokoroModelConfig; | ||
| 85 | 97 | ||
| 86 | function ToString: AnsiString; | 98 | function ToString: AnsiString; |
| 87 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); | 99 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); |
| @@ -739,12 +751,21 @@ type | @@ -739,12 +751,21 @@ type | ||
| 739 | DictDir: PAnsiChar; | 751 | DictDir: PAnsiChar; |
| 740 | end; | 752 | end; |
| 741 | 753 | ||
| 754 | + SherpaOnnxOfflineTtsKokoroModelConfig = record | ||
| 755 | + Model: PAnsiChar; | ||
| 756 | + Voices: PAnsiChar; | ||
| 757 | + Tokens: PAnsiChar; | ||
| 758 | + DataDir: PAnsiChar; | ||
| 759 | + LengthScale: cfloat; | ||
| 760 | + end; | ||
| 761 | + | ||
| 742 | SherpaOnnxOfflineTtsModelConfig = record | 762 | SherpaOnnxOfflineTtsModelConfig = record |
| 743 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; | 763 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; |
| 744 | NumThreads: cint32; | 764 | NumThreads: cint32; |
| 745 | Debug: cint32; | 765 | Debug: cint32; |
| 746 | Provider: PAnsiChar; | 766 | Provider: PAnsiChar; |
| 747 | Matcha: SherpaOnnxOfflineTtsMatchaModelConfig; | 767 | Matcha: SherpaOnnxOfflineTtsMatchaModelConfig; |
| 768 | + Kokoro: SherpaOnnxOfflineTtsKokoroModelConfig; | ||
| 748 | end; | 769 | end; |
| 749 | 770 | ||
| 750 | SherpaOnnxOfflineTtsConfig = record | 771 | SherpaOnnxOfflineTtsConfig = record |
| @@ -1903,6 +1924,23 @@ begin | @@ -1903,6 +1924,23 @@ begin | ||
| 1903 | Dest.LengthScale := 1.0; | 1924 | Dest.LengthScale := 1.0; |
| 1904 | end; | 1925 | end; |
| 1905 | 1926 | ||
| 1927 | +function TSherpaOnnxOfflineTtsKokoroModelConfig.ToString: AnsiString; | ||
| 1928 | +begin | ||
| 1929 | + Result := Format('TSherpaOnnxOfflineTtsKokoroModelConfig(' + | ||
| 1930 | + 'Model := %s, ' + | ||
| 1931 | + 'Voices := %s, ' + | ||
| 1932 | + 'Tokens := %s, ' + | ||
| 1933 | + 'DataDir := %s, ' + | ||
| 1934 | + 'LengthScale := %.2f' + | ||
| 1935 | + ')', | ||
| 1936 | + [Self.Model, Self.Voices, Self.Tokens, Self.DataDir, Self.LengthScale]); | ||
| 1937 | +end; | ||
| 1938 | + | ||
| 1939 | +class operator TSherpaOnnxOfflineTtsKokoroModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig); | ||
| 1940 | +begin | ||
| 1941 | + Dest.LengthScale := 1.0; | ||
| 1942 | +end; | ||
| 1943 | + | ||
| 1906 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; | 1944 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; |
| 1907 | begin | 1945 | begin |
| 1908 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + | 1946 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + |
| @@ -1910,10 +1948,11 @@ begin | @@ -1910,10 +1948,11 @@ begin | ||
| 1910 | 'NumThreads := %d, ' + | 1948 | 'NumThreads := %d, ' + |
| 1911 | 'Debug := %s, ' + | 1949 | 'Debug := %s, ' + |
| 1912 | 'Provider := %s, ' + | 1950 | 'Provider := %s, ' + |
| 1913 | - 'Matcha := %s' + | 1951 | + 'Matcha := %s, ' + |
| 1952 | + 'Kokoro := %s' + | ||
| 1914 | ')', | 1953 | ')', |
| 1915 | [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider, | 1954 | [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 1916 | - Self.Matcha.ToString | 1955 | + Self.Matcha.ToString, Self.Kokoro.ToString |
| 1917 | ]); | 1956 | ]); |
| 1918 | end; | 1957 | end; |
| 1919 | 1958 | ||
| @@ -1966,6 +2005,12 @@ begin | @@ -1966,6 +2005,12 @@ begin | ||
| 1966 | C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale; | 2005 | C.Model.Matcha.LengthScale := Config.Model.Matcha.LengthScale; |
| 1967 | C.Model.Matcha.DictDir := PAnsiChar(Config.Model.Matcha.DictDir); | 2006 | C.Model.Matcha.DictDir := PAnsiChar(Config.Model.Matcha.DictDir); |
| 1968 | 2007 | ||
| 2008 | + C.Model.Kokoro.Model := PAnsiChar(Config.Model.Kokoro.Model); | ||
| 2009 | + C.Model.Kokoro.Voices := PAnsiChar(Config.Model.Kokoro.Voices); | ||
| 2010 | + C.Model.Kokoro.Tokens := PAnsiChar(Config.Model.Kokoro.Tokens); | ||
| 2011 | + C.Model.Kokoro.DataDir := PAnsiChar(Config.Model.Kokoro.DataDir); | ||
| 2012 | + C.Model.Kokoro.LengthScale := Config.Model.Kokoro.LengthScale; | ||
| 2013 | + | ||
| 1969 | C.Model.NumThreads := Config.Model.NumThreads; | 2014 | C.Model.NumThreads := Config.Model.NumThreads; |
| 1970 | C.Model.Provider := PAnsiChar(Config.Model.Provider); | 2015 | C.Model.Provider := PAnsiChar(Config.Model.Provider); |
| 1971 | C.Model.Debug := Ord(Config.Model.Debug); | 2016 | C.Model.Debug := Ord(Config.Model.Debug); |
-
请 注册 或 登录 后发表评论