正在显示
6 个修改的文件
包含
434 行增加
和
3 行删除
| @@ -7,6 +7,8 @@ matcha-en | @@ -7,6 +7,8 @@ matcha-en | ||
| 7 | matcha-zh-playback | 7 | matcha-zh-playback |
| 8 | matcha-en-playback | 8 | matcha-en-playback |
| 9 | kokoro-en | 9 | kokoro-en |
| 10 | +kitten-en | ||
| 10 | kokoro-en-playback | 11 | kokoro-en-playback |
| 12 | +kitten-en-playback | ||
| 11 | kokoro-zh-en | 13 | kokoro-zh-en |
| 12 | kokoro-zh-en-playback | 14 | kokoro-zh-en-playback |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program kitten_en_playback; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with kitten models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +Note that it plays the audio back as it is still generating. | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + {$ifdef unix} | ||
| 16 | + cthreads, | ||
| 17 | + {$endif} | ||
| 18 | + SysUtils, | ||
| 19 | + dos, | ||
| 20 | + ctypes, | ||
| 21 | + portaudio, | ||
| 22 | + sherpa_onnx; | ||
| 23 | + | ||
| 24 | +var | ||
| 25 | + CriticalSection: TRTLCriticalSection; | ||
| 26 | + | ||
| 27 | + Tts: TSherpaOnnxOfflineTts; | ||
| 28 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 29 | + Resampler: TSherpaOnnxLinearResampler; | ||
| 30 | + | ||
| 31 | + Text: AnsiString; | ||
| 32 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 33 | + SpeakerId: Integer = 0; | ||
| 34 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 35 | + FinishedGeneration: Boolean = False; | ||
| 36 | + FinishedPlaying: Boolean = False; | ||
| 37 | + | ||
| 38 | + Version: String; | ||
| 39 | + EnvStr: String; | ||
| 40 | + Status: Integer; | ||
| 41 | + NumDevices: Integer; | ||
| 42 | + DeviceIndex: Integer; | ||
| 43 | + DeviceInfo: PPaDeviceInfo; | ||
| 44 | + | ||
| 45 | + { If you get EDivByZero: Division by zero error, please change the sample rate | ||
| 46 | + to the one supported by your microphone. | ||
| 47 | + } | ||
| 48 | + DeviceSampleRate: Integer = 48000; | ||
| 49 | + I: Integer; | ||
| 50 | + Param: TPaStreamParameters; | ||
| 51 | + Stream: PPaStream; | ||
| 52 | + Wave: TSherpaOnnxWave; | ||
| 53 | + | ||
| 54 | +function GenerateCallback( | ||
| 55 | + Samples: pcfloat; N: cint32; | ||
| 56 | + Arg: Pointer): cint; cdecl; | ||
| 57 | +begin | ||
| 58 | + EnterCriticalSection(CriticalSection); | ||
| 59 | + try | ||
| 60 | + if Resampler <> nil then | ||
| 61 | + Buffer.Push(Resampler.Resample(Samples, N, False)) | ||
| 62 | + else | ||
| 63 | + Buffer.Push(Samples, N); | ||
| 64 | + finally | ||
| 65 | + LeaveCriticalSection(CriticalSection); | ||
| 66 | + end; | ||
| 67 | + | ||
| 68 | + { 1 means to continue generating; 0 means to stop generating. } | ||
| 69 | + Result := 1; | ||
| 70 | +end; | ||
| 71 | + | ||
| 72 | +function PlayCallback( | ||
| 73 | + input: Pointer; output: Pointer; | ||
| 74 | + frameCount: culong; | ||
| 75 | + timeInfo: PPaStreamCallbackTimeInfo; | ||
| 76 | + statusFlags: TPaStreamCallbackFlags; | ||
| 77 | + userData: Pointer ): cint; cdecl; | ||
| 78 | +var | ||
| 79 | + Samples: TSherpaOnnxSamplesArray; | ||
| 80 | + I: Integer; | ||
| 81 | +begin | ||
| 82 | + EnterCriticalSection(CriticalSection); | ||
| 83 | + try | ||
| 84 | + if Buffer.Size >= frameCount then | ||
| 85 | + begin | ||
| 86 | + Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
| 87 | + Buffer.Pop(FrameCount); | ||
| 88 | + end | ||
| 89 | + else if Buffer.Size > 0 then | ||
| 90 | + begin | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
| 92 | + Buffer.Pop(Buffer.Size); | ||
| 93 | + SetLength(Samples, frameCount); | ||
| 94 | + end | ||
| 95 | + else | ||
| 96 | + SetLength(Samples, frameCount); | ||
| 97 | + | ||
| 98 | + for I := 0 to frameCount - 1 do | ||
| 99 | + pcfloat(output)[I] := Samples[I]; | ||
| 100 | + | ||
| 101 | + if (Buffer.Size > 0) or (not FinishedGeneration) then | ||
| 102 | + Result := paContinue | ||
| 103 | + else | ||
| 104 | + begin | ||
| 105 | + Result := paComplete; | ||
| 106 | + FinishedPlaying := True; | ||
| 107 | + end; | ||
| 108 | + finally | ||
| 109 | + LeaveCriticalSection(CriticalSection); | ||
| 110 | + end; | ||
| 111 | +end; | ||
| 112 | + | ||
| 113 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 114 | +var | ||
| 115 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 116 | +begin | ||
| 117 | + Config.Model.Kitten.Model := './kitten-nano-en-v0_1-fp16/model.fp16.onnx'; | ||
| 118 | + Config.Model.Kitten.Voices := './kitten-nano-en-v0_1-fp16/voices.bin'; | ||
| 119 | + Config.Model.Kitten.Tokens := './kitten-nano-en-v0_1-fp16/tokens.txt'; | ||
| 120 | + Config.Model.Kitten.DataDir := './kitten-nano-en-v0_1-fp16/espeak-ng-data'; | ||
| 121 | + Config.Model.NumThreads := 2; | ||
| 122 | + Config.Model.Debug := False; | ||
| 123 | + Config.MaxNumSentences := 1; | ||
| 124 | + | ||
| 125 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 126 | +end; | ||
| 127 | + | ||
| 128 | +begin | ||
| 129 | + Tts := GetOfflineTts; | ||
| 130 | + if Tts.GetSampleRate <> DeviceSampleRate then | ||
| 131 | + Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate); | ||
| 132 | + | ||
| 133 | + Version := String(Pa_GetVersionText); | ||
| 134 | + WriteLn('Version is ', Version); | ||
| 135 | + Status := Pa_Initialize; | ||
| 136 | + if Status <> paNoError then | ||
| 137 | + begin | ||
| 138 | + WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 139 | + Exit; | ||
| 140 | + end; | ||
| 141 | + | ||
| 142 | + NumDevices := Pa_GetDeviceCount; | ||
| 143 | + WriteLn('Num devices: ', NumDevices); | ||
| 144 | + | ||
| 145 | + DeviceIndex := Pa_GetDefaultOutputDevice; | ||
| 146 | + | ||
| 147 | + if DeviceIndex = paNoDevice then | ||
| 148 | + begin | ||
| 149 | + WriteLn('No default output device found'); | ||
| 150 | + Pa_Terminate; | ||
| 151 | + Exit; | ||
| 152 | + end; | ||
| 153 | + | ||
| 154 | + EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
| 155 | + if EnvStr <> '' then | ||
| 156 | + begin | ||
| 157 | + DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
| 158 | + WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
| 159 | + end; | ||
| 160 | + | ||
| 161 | + for I := 0 to (NumDevices - 1) do | ||
| 162 | + begin | ||
| 163 | + DeviceInfo := Pa_GetDeviceInfo(I); | ||
| 164 | + if I = DeviceIndex then | ||
| 165 | + { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
| 166 | + WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
| 167 | + else | ||
| 168 | + WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
| 169 | + end; | ||
| 170 | + | ||
| 171 | + WriteLn('Use device ', DeviceIndex); | ||
| 172 | + WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
| 173 | + WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
| 174 | + | ||
| 175 | + Initialize(Param); | ||
| 176 | + Param.Device := DeviceIndex; | ||
| 177 | + Param.ChannelCount := 1; | ||
| 178 | + Param.SampleFormat := paFloat32; | ||
| 179 | + param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
| 180 | + param.HostApiSpecificStreamInfo := nil; | ||
| 181 | + | ||
| 182 | + Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate); | ||
| 183 | + | ||
| 184 | + | ||
| 185 | + { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. } | ||
| 186 | + Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
| 187 | + PPaStreamCallback(@PlayCallback), nil); | ||
| 188 | + | ||
| 189 | + if Status <> paNoError then | ||
| 190 | + begin | ||
| 191 | + WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
| 192 | + Pa_Terminate; | ||
| 193 | + Exit; | ||
| 194 | + end; | ||
| 195 | + | ||
| 196 | + InitCriticalSection(CriticalSection); | ||
| 197 | + | ||
| 198 | + Status := Pa_StartStream(stream); | ||
| 199 | + if Status <> paNoError then | ||
| 200 | + begin | ||
| 201 | + WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
| 202 | + Pa_Terminate; | ||
| 203 | + Exit; | ||
| 204 | + end; | ||
| 205 | + | ||
| 206 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 207 | + | ||
| 208 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 209 | + | ||
| 210 | + Audio := Tts.Generate(Text, SpeakerId, Speed, | ||
| 211 | + PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | ||
| 212 | + FinishedGeneration := True; | ||
| 213 | + SherpaOnnxWriteWave('./kitten-en-playback-0.wav', Audio.Samples, Audio.SampleRate); | ||
| 214 | + WriteLn('Saved to ./kitten-en-playback-0.wav'); | ||
| 215 | + | ||
| 216 | + while not FinishedPlaying do | ||
| 217 | + Pa_Sleep(100); {sleep for 0.1 second } | ||
| 218 | + {TODO(fangjun): Use an event to indicate the play is finished} | ||
| 219 | + | ||
| 220 | + DoneCriticalSection(CriticalSection); | ||
| 221 | + | ||
| 222 | + FreeAndNil(Tts); | ||
| 223 | + FreeAndNil(Resampler); | ||
| 224 | + | ||
| 225 | + Status := Pa_CloseStream(stream); | ||
| 226 | + if Status <> paNoError then | ||
| 227 | + begin | ||
| 228 | + WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
| 229 | + Exit; | ||
| 230 | + end; | ||
| 231 | + | ||
| 232 | + Status := Pa_Terminate; | ||
| 233 | + if Status <> paNoError then | ||
| 234 | + begin | ||
| 235 | + WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 236 | + Exit; | ||
| 237 | + end; | ||
| 238 | +end. | ||
| 239 | + |
pascal-api-examples/tts/kitten-en.pas
0 → 100644
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | +program kitten_en; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Kitten TTS models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +If you want to play it while it is generating, please see | ||
| 10 | +./kitten-en-playback.pas | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + SysUtils, | ||
| 17 | + sherpa_onnx; | ||
| 18 | + | ||
| 19 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 22 | +begin | ||
| 23 | + Config.Model.Kitten.Model := './kitten-nano-en-v0_1-fp16/model.fp16.onnx'; | ||
| 24 | + Config.Model.Kitten.Voices := './kitten-nano-en-v0_1-fp16/voices.bin'; | ||
| 25 | + Config.Model.Kitten.Tokens := './kitten-nano-en-v0_1-fp16/tokens.txt'; | ||
| 26 | + Config.Model.Kitten.DataDir := './kitten-nano-en-v0_1-fp16/espeak-ng-data'; | ||
| 27 | + Config.Model.NumThreads := 2; | ||
| 28 | + Config.Model.Debug := False; | ||
| 29 | + Config.MaxNumSentences := 1; | ||
| 30 | + | ||
| 31 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 32 | +end; | ||
| 33 | + | ||
| 34 | +var | ||
| 35 | + Tts: TSherpaOnnxOfflineTts; | ||
| 36 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 37 | + | ||
| 38 | + Text: AnsiString; | ||
| 39 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 40 | + SpeakerId: Integer = 0; | ||
| 41 | + | ||
| 42 | +begin | ||
| 43 | + Tts := GetOfflineTts; | ||
| 44 | + | ||
| 45 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 46 | + | ||
| 47 | + Text := 'Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone.'; | ||
| 48 | + | ||
| 49 | + Audio := Tts.Generate(Text, SpeakerId, Speed); | ||
| 50 | + SherpaOnnxWriteWave('./kitten-en-0.wav', Audio.Samples, Audio.SampleRate); | ||
| 51 | + WriteLn('Saved to ./kitten-en-0.wav'); | ||
| 52 | + | ||
| 53 | + FreeAndNil(Tts); | ||
| 54 | +end. | ||
| 55 | + |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html | ||
| 28 | +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 30 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 31 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 36 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 37 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 38 | + -Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
| 39 | + ./kitten-en-playback.pas | ||
| 40 | + | ||
| 41 | +# Please see ../portaudio-test/README.md | ||
| 42 | +# for how to install portaudio on macOS | ||
| 43 | + | ||
| 44 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 45 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 46 | + | ||
| 47 | +./kitten-en-playback |
pascal-api-examples/tts/run-kitten-en.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +# please visit | ||
| 27 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kitten.html | ||
| 28 | +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 30 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 31 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 36 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 37 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 38 | + ./kitten-en.pas | ||
| 39 | + | ||
| 40 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 41 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 42 | + | ||
| 43 | +./kitten-en |
| @@ -90,6 +90,17 @@ type | @@ -90,6 +90,17 @@ type | ||
| 90 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig); | 90 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKokoroModelConfig); |
| 91 | end; | 91 | end; |
| 92 | 92 | ||
| 93 | + TSherpaOnnxOfflineTtsKittenModelConfig = record | ||
| 94 | + Model: AnsiString; | ||
| 95 | + Voices: AnsiString; | ||
| 96 | + Tokens: AnsiString; | ||
| 97 | + DataDir: AnsiString; | ||
| 98 | + LengthScale: Single; | ||
| 99 | + | ||
| 100 | + function ToString: AnsiString; | ||
| 101 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKittenModelConfig); | ||
| 102 | + end; | ||
| 103 | + | ||
| 93 | TSherpaOnnxOfflineTtsModelConfig = record | 104 | TSherpaOnnxOfflineTtsModelConfig = record |
| 94 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; | 105 | Vits: TSherpaOnnxOfflineTtsVitsModelConfig; |
| 95 | NumThreads: Integer; | 106 | NumThreads: Integer; |
| @@ -97,6 +108,7 @@ type | @@ -97,6 +108,7 @@ type | ||
| 97 | Provider: AnsiString; | 108 | Provider: AnsiString; |
| 98 | Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig; | 109 | Matcha: TSherpaOnnxOfflineTtsMatchaModelConfig; |
| 99 | Kokoro: TSherpaOnnxOfflineTtsKokoroModelConfig; | 110 | Kokoro: TSherpaOnnxOfflineTtsKokoroModelConfig; |
| 111 | + Kitten: TSherpaOnnxOfflineTtsKittenModelConfig; | ||
| 100 | 112 | ||
| 101 | function ToString: AnsiString; | 113 | function ToString: AnsiString; |
| 102 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); | 114 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); |
| @@ -913,6 +925,14 @@ type | @@ -913,6 +925,14 @@ type | ||
| 913 | Lang: PAnsiChar; | 925 | Lang: PAnsiChar; |
| 914 | end; | 926 | end; |
| 915 | 927 | ||
| 928 | + SherpaOnnxOfflineTtsKittenModelConfig = record | ||
| 929 | + Model: PAnsiChar; | ||
| 930 | + Voices: PAnsiChar; | ||
| 931 | + Tokens: PAnsiChar; | ||
| 932 | + DataDir: PAnsiChar; | ||
| 933 | + LengthScale: cfloat; | ||
| 934 | + end; | ||
| 935 | + | ||
| 916 | SherpaOnnxOfflineTtsModelConfig = record | 936 | SherpaOnnxOfflineTtsModelConfig = record |
| 917 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; | 937 | Vits: SherpaOnnxOfflineTtsVitsModelConfig; |
| 918 | NumThreads: cint32; | 938 | NumThreads: cint32; |
| @@ -920,6 +940,7 @@ type | @@ -920,6 +940,7 @@ type | ||
| 920 | Provider: PAnsiChar; | 940 | Provider: PAnsiChar; |
| 921 | Matcha: SherpaOnnxOfflineTtsMatchaModelConfig; | 941 | Matcha: SherpaOnnxOfflineTtsMatchaModelConfig; |
| 922 | Kokoro: SherpaOnnxOfflineTtsKokoroModelConfig; | 942 | Kokoro: SherpaOnnxOfflineTtsKokoroModelConfig; |
| 943 | + Kitten: SherpaOnnxOfflineTtsKittenModelConfig; | ||
| 923 | end; | 944 | end; |
| 924 | 945 | ||
| 925 | SherpaOnnxOfflineTtsConfig = record | 946 | SherpaOnnxOfflineTtsConfig = record |
| @@ -1340,7 +1361,7 @@ begin | @@ -1340,7 +1361,7 @@ begin | ||
| 1340 | 'ModelType := %s, ' + | 1361 | 'ModelType := %s, ' + |
| 1341 | 'ModelingUnit := %s, ' + | 1362 | 'ModelingUnit := %s, ' + |
| 1342 | 'BpeVocab := %s, ' + | 1363 | 'BpeVocab := %s, ' + |
| 1343 | - 'NemoCtc := %s', | 1364 | + 'NemoCtc := %s)', |
| 1344 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1365 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1345 | Self.Zipformer2Ctc.ToString, Self.Tokens, | 1366 | Self.Zipformer2Ctc.ToString, Self.Tokens, |
| 1346 | Self.NumThreads, Self.Provider, Self.Debug.ToString, | 1367 | Self.NumThreads, Self.Provider, Self.Debug.ToString, |
| @@ -2298,6 +2319,23 @@ begin | @@ -2298,6 +2319,23 @@ begin | ||
| 2298 | Dest.LengthScale := 1.0; | 2319 | Dest.LengthScale := 1.0; |
| 2299 | end; | 2320 | end; |
| 2300 | 2321 | ||
| 2322 | +function TSherpaOnnxOfflineTtsKittenModelConfig.ToString: AnsiString; | ||
| 2323 | +begin | ||
| 2324 | + Result := Format('TSherpaOnnxOfflineTtsKittenModelConfig(' + | ||
| 2325 | + 'Model := %s, ' + | ||
| 2326 | + 'Voices := %s, ' + | ||
| 2327 | + 'Tokens := %s, ' + | ||
| 2328 | + 'DataDir := %s, ' + | ||
| 2329 | + 'LengthScale := %.2f' + | ||
| 2330 | + ')', | ||
| 2331 | + [Self.Model, Self.Voices, Self.Tokens, Self.DataDir, Self.LengthScale]); | ||
| 2332 | +end; | ||
| 2333 | + | ||
| 2334 | +class operator TSherpaOnnxOfflineTtsKittenModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsKittenModelConfig); | ||
| 2335 | +begin | ||
| 2336 | + Dest.LengthScale := 1.0; | ||
| 2337 | +end; | ||
| 2338 | + | ||
| 2301 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; | 2339 | function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; |
| 2302 | begin | 2340 | begin |
| 2303 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + | 2341 | Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + |
| @@ -2306,10 +2344,11 @@ begin | @@ -2306,10 +2344,11 @@ begin | ||
| 2306 | 'Debug := %s, ' + | 2344 | 'Debug := %s, ' + |
| 2307 | 'Provider := %s, ' + | 2345 | 'Provider := %s, ' + |
| 2308 | 'Matcha := %s, ' + | 2346 | 'Matcha := %s, ' + |
| 2309 | - 'Kokoro := %s' + | 2347 | + 'Kokoro := %s, ' + |
| 2348 | + 'Kitten := %s' + | ||
| 2310 | ')', | 2349 | ')', |
| 2311 | [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider, | 2350 | [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 2312 | - Self.Matcha.ToString, Self.Kokoro.ToString | 2351 | + Self.Matcha.ToString, Self.Kokoro.ToString, Self.Kitten.ToString |
| 2313 | ]); | 2352 | ]); |
| 2314 | end; | 2353 | end; |
| 2315 | 2354 | ||
| @@ -2373,6 +2412,12 @@ begin | @@ -2373,6 +2412,12 @@ begin | ||
| 2373 | C.Model.Kokoro.Lexicon := PAnsiChar(Config.Model.Kokoro.Lexicon); | 2412 | C.Model.Kokoro.Lexicon := PAnsiChar(Config.Model.Kokoro.Lexicon); |
| 2374 | C.Model.Kokoro.Lang := PAnsiChar(Config.Model.Kokoro.Lang); | 2413 | C.Model.Kokoro.Lang := PAnsiChar(Config.Model.Kokoro.Lang); |
| 2375 | 2414 | ||
| 2415 | + C.Model.Kitten.Model := PAnsiChar(Config.Model.Kitten.Model); | ||
| 2416 | + C.Model.Kitten.Voices := PAnsiChar(Config.Model.Kitten.Voices); | ||
| 2417 | + C.Model.Kitten.Tokens := PAnsiChar(Config.Model.Kitten.Tokens); | ||
| 2418 | + C.Model.Kitten.DataDir := PAnsiChar(Config.Model.Kitten.DataDir); | ||
| 2419 | + C.Model.Kitten.LengthScale := Config.Model.Kitten.LengthScale; | ||
| 2420 | + | ||
| 2376 | C.Model.NumThreads := Config.Model.NumThreads; | 2421 | C.Model.NumThreads := Config.Model.NumThreads; |
| 2377 | C.Model.Provider := PAnsiChar(Config.Model.Provider); | 2422 | C.Model.Provider := PAnsiChar(Config.Model.Provider); |
| 2378 | C.Model.Debug := Ord(Config.Model.Debug); | 2423 | C.Model.Debug := Ord(Config.Model.Debug); |
-
请 注册 或 登录 后发表评论