Committed by
GitHub
Text to speech API for Object Pascal. (#1273)
正在显示
14 个修改的文件
包含
905 行增加
和
22 行删除
| @@ -119,13 +119,29 @@ jobs: | @@ -119,13 +119,29 @@ jobs: | ||
| 119 | cp -v install/lib/*.dll ../pascal-api-examples/vad | 119 | cp -v install/lib/*.dll ../pascal-api-examples/vad |
| 120 | cp -v install/lib/*.dll ../pascal-api-examples/vad-with-non-streaming-asr | 120 | cp -v install/lib/*.dll ../pascal-api-examples/vad-with-non-streaming-asr |
| 121 | 121 | ||
| 122 | - cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav | ||
| 123 | - cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr | ||
| 124 | - cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr | ||
| 125 | - cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/vad | ||
| 126 | - cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/vad-with-non-streaming-asr | 122 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/read-wav |
| 123 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/streaming-asr | ||
| 124 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/non-streaming-asr | ||
| 125 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad | ||
| 126 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/vad-with-non-streaming-asr | ||
| 127 | + cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts | ||
| 127 | fi | 128 | fi |
| 128 | 129 | ||
| 130 | + - name: Run Pascal test (TTS) | ||
| 131 | + shell: bash | ||
| 132 | + run: | | ||
| 133 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 134 | + | ||
| 135 | + cd ./pascal-api-examples | ||
| 136 | + pushd tts | ||
| 137 | + | ||
| 138 | + ./run-piper.sh | ||
| 139 | + rm -rf vits-piper-* | ||
| 140 | + ls -lh | ||
| 141 | + echo "---" | ||
| 142 | + | ||
| 143 | + popd | ||
| 144 | + | ||
| 129 | - name: Run Pascal test (VAD + non-streaming ASR) | 145 | - name: Run Pascal test (VAD + non-streaming ASR) |
| 130 | shell: bash | 146 | shell: bash |
| 131 | run: | | 147 | run: | |
pascal-api-examples/.gitignore
0 → 100644
| 1 | +link*.res |
| @@ -13,3 +13,5 @@ https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html | @@ -13,3 +13,5 @@ https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html | ||
| 13 | |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| | 13 | |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| |
| 14 | |[vad](./vad)| It shows how to use the voice activity detection API.| | 14 | |[vad](./vad)| It shows how to use the voice activity detection API.| |
| 15 | |[vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| It shows how to use the voice activity detection API with non-streaming models for speech recognition.| | 15 | |[vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| It shows how to use the voice activity detection API with non-streaming models for speech recognition.| |
| 16 | +|[portaudio-test](./portaudio-test)| It shows how to use PortAudio for recording and playing.| | ||
| 17 | +|[tts](./tts)| It shows how to use the text-to-speech API.| |
pascal-api-examples/tts/.gitignore
0 → 100644
pascal-api-examples/tts/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This directory contains examples for how to use the TTS (text to speech) APIs. | ||
| 4 | + | ||
| 5 | +|Directory| Description| | ||
| 6 | +|---------|------------| | ||
| 7 | +|[run-piper.sh](./run-piper.sh)|It shows how to use models from [piper](https://github.com/rhasspy/piper) for text to speech.| | ||
| 8 | +|[run-piper-playback.sh](./run-piper-playback.sh)|It shows how to use models from [piper](https://github.com/rhasspy/piper) for text to speech. It plays the generated audio as it is still generating. | | ||
| 9 | + |
pascal-api-examples/tts/piper-playback.pas
0 → 100644
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | +program piper; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Piper models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +Note that it plays the audio back as it is still generating. | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + {$ifdef unix} | ||
| 16 | + cthreads, | ||
| 17 | + {$endif} | ||
| 18 | + SysUtils, | ||
| 19 | + dos, | ||
| 20 | + ctypes, | ||
| 21 | + portaudio, | ||
| 22 | + sherpa_onnx; | ||
| 23 | + | ||
| 24 | +var | ||
| 25 | + CriticalSection: TRTLCriticalSection; | ||
| 26 | + | ||
| 27 | + Tts: TSherpaOnnxOfflineTts; | ||
| 28 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 29 | + Resampler: TSherpaOnnxLinearResampler; | ||
| 30 | + | ||
| 31 | + Text: AnsiString; | ||
| 32 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 33 | + SpeakerId: Integer = 0; | ||
| 34 | + Buffer: TSherpaOnnxCircularBuffer; | ||
| 35 | + FinishedGeneration: Boolean = False; | ||
| 36 | + FinishedPlaying: Boolean = False; | ||
| 37 | + | ||
| 38 | + Version: String; | ||
| 39 | + EnvStr: String; | ||
| 40 | + Status: Integer; | ||
| 41 | + NumDevices: Integer; | ||
| 42 | + DeviceIndex: Integer; | ||
| 43 | + DeviceInfo: PPaDeviceInfo; | ||
| 44 | + | ||
| 45 | + { If you get EDivByZero: Division by zero error, please change the sample rate | ||
| 46 | + to the one supported by your microphone. | ||
| 47 | + } | ||
| 48 | + DeviceSampleRate: Integer = 48000; | ||
| 49 | + I: Integer; | ||
| 50 | + Param: TPaStreamParameters; | ||
| 51 | + Stream: PPaStream; | ||
| 52 | + Wave: TSherpaOnnxWave; | ||
| 53 | + | ||
| 54 | +function GenerateCallback( | ||
| 55 | + Samples: pcfloat; N: cint32; | ||
| 56 | + Arg: Pointer): cint; cdecl; | ||
| 57 | +begin | ||
| 58 | + EnterCriticalSection(CriticalSection); | ||
| 59 | + try | ||
| 60 | + if Resampler <> nil then | ||
| 61 | + Buffer.Push(Resampler.Resample(Samples, N, False)) | ||
| 62 | + else | ||
| 63 | + Buffer.Push(Samples, N); | ||
| 64 | + finally | ||
| 65 | + LeaveCriticalSection(CriticalSection); | ||
| 66 | + end; | ||
| 67 | + | ||
| 68 | + { 1 means to continue generating; 0 means to stop generating. } | ||
| 69 | + Result := 1; | ||
| 70 | +end; | ||
| 71 | + | ||
| 72 | +function PlayCallback( | ||
| 73 | + input: Pointer; output: Pointer; | ||
| 74 | + frameCount: culong; | ||
| 75 | + timeInfo: PPaStreamCallbackTimeInfo; | ||
| 76 | + statusFlags: TPaStreamCallbackFlags; | ||
| 77 | + userData: Pointer ): cint; cdecl; | ||
| 78 | +var | ||
| 79 | + Samples: TSherpaOnnxSamplesArray; | ||
| 80 | + I: Integer; | ||
| 81 | +begin | ||
| 82 | + EnterCriticalSection(CriticalSection); | ||
| 83 | + try | ||
| 84 | + if Buffer.Size >= frameCount then | ||
| 85 | + begin | ||
| 86 | + Samples := Buffer.Get(Buffer.Head, FrameCount); | ||
| 87 | + Buffer.Pop(FrameCount); | ||
| 88 | + end | ||
| 89 | + else if Buffer.Size > 0 then | ||
| 90 | + begin | ||
| 91 | + Samples := Buffer.Get(Buffer.Head, Buffer.Size); | ||
| 92 | + Buffer.Pop(Buffer.Size); | ||
| 93 | + SetLength(Samples, frameCount); | ||
| 94 | + end | ||
| 95 | + else | ||
| 96 | + SetLength(Samples, frameCount); | ||
| 97 | + | ||
| 98 | + for I := 0 to frameCount - 1 do | ||
| 99 | + pcfloat(output)[I] := Samples[I]; | ||
| 100 | + | ||
| 101 | + if (Buffer.Size > 0) or (not FinishedGeneration) then | ||
| 102 | + Result := paContinue | ||
| 103 | + else | ||
| 104 | + begin | ||
| 105 | + Result := paComplete; | ||
| 106 | + FinishedPlaying := True; | ||
| 107 | + end; | ||
| 108 | + finally | ||
| 109 | + LeaveCriticalSection(CriticalSection); | ||
| 110 | + end; | ||
| 111 | +end; | ||
| 112 | + | ||
| 113 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 114 | +var | ||
| 115 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 116 | +begin | ||
| 117 | + Config.Model.Vits.Model := './vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx'; | ||
| 118 | + Config.Model.Vits.Tokens := './vits-piper-en_US-libritts_r-medium/tokens.txt'; | ||
| 119 | + Config.Model.Vits.DataDir := './vits-piper-en_US-libritts_r-medium/espeak-ng-data'; | ||
| 120 | + Config.Model.NumThreads := 1; | ||
| 121 | + Config.Model.Debug := False; | ||
| 122 | + Config.MaxNumSentences := 1; | ||
| 123 | + | ||
| 124 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 125 | +end; | ||
| 126 | + | ||
| 127 | +begin | ||
| 128 | + Tts := GetOfflineTts; | ||
| 129 | + if Tts.GetSampleRate <> DeviceSampleRate then | ||
| 130 | + Resampler := TSherpaOnnxLinearResampler.Create(Tts.GetSampleRate, DeviceSampleRate); | ||
| 131 | + | ||
| 132 | + Version := String(Pa_GetVersionText); | ||
| 133 | + WriteLn('Version is ', Version); | ||
| 134 | + Status := Pa_Initialize; | ||
| 135 | + if Status <> paNoError then | ||
| 136 | + begin | ||
| 137 | + WriteLn('Failed to initialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 138 | + Exit; | ||
| 139 | + end; | ||
| 140 | + | ||
| 141 | + NumDevices := Pa_GetDeviceCount; | ||
| 142 | + WriteLn('Num devices: ', NumDevices); | ||
| 143 | + | ||
| 144 | + DeviceIndex := Pa_GetDefaultOutputDevice; | ||
| 145 | + | ||
| 146 | + if DeviceIndex = paNoDevice then | ||
| 147 | + begin | ||
| 148 | + WriteLn('No default output device found'); | ||
| 149 | + Pa_Terminate; | ||
| 150 | + Exit; | ||
| 151 | + end; | ||
| 152 | + | ||
| 153 | + EnvStr := GetEnv('SHERPA_ONNX_MIC_DEVICE'); | ||
| 154 | + if EnvStr <> '' then | ||
| 155 | + begin | ||
| 156 | + DeviceIndex := StrToIntDef(EnvStr, DeviceIndex); | ||
| 157 | + WriteLn('Use device index from environment variable SHERPA_ONNX_MIC_DEVICE: ', EnvStr); | ||
| 158 | + end; | ||
| 159 | + | ||
| 160 | + for I := 0 to (NumDevices - 1) do | ||
| 161 | + begin | ||
| 162 | + DeviceInfo := Pa_GetDeviceInfo(I); | ||
| 163 | + if I = DeviceIndex then | ||
| 164 | + { WriteLn(Format(' * %d %s', [I, DeviceInfo^.Name])) } | ||
| 165 | + WriteLn(Format(' * %d %s', [I, AnsiString(DeviceInfo^.Name)])) | ||
| 166 | + else | ||
| 167 | + WriteLn(Format(' %d %s', [I, AnsiString(DeviceInfo^.Name)])); | ||
| 168 | + end; | ||
| 169 | + | ||
| 170 | + WriteLn('Use device ', DeviceIndex); | ||
| 171 | + WriteLn(' Name ', Pa_GetDeviceInfo(DeviceIndex)^.Name); | ||
| 172 | + WriteLn(' Max output channels ', Pa_GetDeviceInfo(DeviceIndex)^.MaxOutputChannels); | ||
| 173 | + | ||
| 174 | + Initialize(Param); | ||
| 175 | + Param.Device := DeviceIndex; | ||
| 176 | + Param.ChannelCount := 1; | ||
| 177 | + Param.SampleFormat := paFloat32; | ||
| 178 | + param.SuggestedLatency := Pa_GetDeviceInfo(DeviceIndex)^.DefaultHighOutputLatency; | ||
| 179 | + param.HostApiSpecificStreamInfo := nil; | ||
| 180 | + | ||
| 181 | + Buffer := TSherpaOnnxCircularBuffer.Create(30 * DeviceSampleRate); | ||
| 182 | + | ||
| 183 | + | ||
| 184 | + { Note(fangjun): PortAudio invokes PlayCallback in a separate thread. } | ||
| 185 | + Status := Pa_OpenStream(stream, nil, @Param, DeviceSampleRate, paFramesPerBufferUnspecified, paNoFlag, | ||
| 186 | + PPaStreamCallback(@PlayCallback), nil); | ||
| 187 | + | ||
| 188 | + if Status <> paNoError then | ||
| 189 | + begin | ||
| 190 | + WriteLn('Failed to open stream, ', Pa_GetErrorText(Status)); | ||
| 191 | + Pa_Terminate; | ||
| 192 | + Exit; | ||
| 193 | + end; | ||
| 194 | + | ||
| 195 | + InitCriticalSection(CriticalSection); | ||
| 196 | + | ||
| 197 | + Status := Pa_StartStream(stream); | ||
| 198 | + if Status <> paNoError then | ||
| 199 | + begin | ||
| 200 | + WriteLn('Failed to start stream, ', Pa_GetErrorText(Status)); | ||
| 201 | + Pa_Terminate; | ||
| 202 | + Exit; | ||
| 203 | + end; | ||
| 204 | + | ||
| 205 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 206 | + | ||
| 207 | + Text := 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'; | ||
| 208 | + | ||
| 209 | + Audio := Tts.Generate(Text, SpeakerId, Speed, | ||
| 210 | + PSherpaOnnxGeneratedAudioCallbackWithArg(@GenerateCallback), nil); | ||
| 211 | + FinishedGeneration := True; | ||
| 212 | + SherpaOnnxWriteWave('./libritts_r-generated.wav', Audio.Samples, Audio.SampleRate); | ||
| 213 | + WriteLn('Saved to ./libritts_r-generated.wav'); | ||
| 214 | + | ||
| 215 | + while not FinishedPlaying do | ||
| 216 | + Pa_Sleep(100); {sleep for 0.1 second } | ||
| 217 | + {TODO(fangjun): Use an event to indicate the play is finished} | ||
| 218 | + | ||
| 219 | + DoneCriticalSection(CriticalSection); | ||
| 220 | + | ||
| 221 | + FreeAndNil(Tts); | ||
| 222 | + FreeAndNil(Resampler); | ||
| 223 | + | ||
| 224 | + Status := Pa_CloseStream(stream); | ||
| 225 | + if Status <> paNoError then | ||
| 226 | + begin | ||
| 227 | + WriteLn('Failed to close stream, ', Pa_GetErrorText(Status)); | ||
| 228 | + Exit; | ||
| 229 | + end; | ||
| 230 | + | ||
| 231 | + Status := Pa_Terminate; | ||
| 232 | + if Status <> paNoError then | ||
| 233 | + begin | ||
| 234 | + WriteLn('Failed to deinitialize portaudio, ', Pa_GetErrorText(Status)); | ||
| 235 | + Exit; | ||
| 236 | + end; | ||
| 237 | +end. | ||
| 238 | + |
pascal-api-examples/tts/piper.pas
0 → 100644
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | +program piper; | ||
| 3 | +{ | ||
| 4 | +This file shows how to use the text to speech API of sherpa-onnx | ||
| 5 | +with Piper models. | ||
| 6 | + | ||
| 7 | +It generates speech from text and saves it to a wave file. | ||
| 8 | + | ||
| 9 | +If you want to play it while it is generating, please see | ||
| 10 | +./piper-playback.pas | ||
| 11 | +} | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + SysUtils, | ||
| 17 | + sherpa_onnx; | ||
| 18 | + | ||
| 19 | +function GetOfflineTts: TSherpaOnnxOfflineTts; | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOfflineTtsConfig; | ||
| 22 | +begin | ||
| 23 | + Config.Model.Vits.Model := './vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx'; | ||
| 24 | + Config.Model.Vits.Tokens := './vits-piper-en_US-libritts_r-medium/tokens.txt'; | ||
| 25 | + Config.Model.Vits.DataDir := './vits-piper-en_US-libritts_r-medium/espeak-ng-data'; | ||
| 26 | + Config.Model.NumThreads := 1; | ||
| 27 | + Config.Model.Debug := False; | ||
| 28 | + Config.MaxNumSentences := 1; | ||
| 29 | + | ||
| 30 | + Result := TSherpaOnnxOfflineTts.Create(Config); | ||
| 31 | +end; | ||
| 32 | + | ||
| 33 | +var | ||
| 34 | + Tts: TSherpaOnnxOfflineTts; | ||
| 35 | + Audio: TSherpaOnnxGeneratedAudio; | ||
| 36 | + | ||
| 37 | + Text: AnsiString; | ||
| 38 | + Speed: Single = 1.0; {Use a larger value to speak faster} | ||
| 39 | + SpeakerId: Integer = 0; | ||
| 40 | + | ||
| 41 | +begin | ||
| 42 | + Tts := GetOfflineTts; | ||
| 43 | + | ||
| 44 | + WriteLn('There are ', Tts.GetNumSpeakers, ' speakers'); | ||
| 45 | + | ||
| 46 | + Text := 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'; | ||
| 47 | + | ||
| 48 | + Audio := Tts.Generate(Text, SpeakerId, Speed); | ||
| 49 | + SherpaOnnxWriteWave('./libritts_r-generated.wav', Audio.Samples, Audio.SampleRate); | ||
| 50 | + WriteLn('Saved to ./libritts_r-generated.wav'); | ||
| 51 | + | ||
| 52 | + FreeAndNil(Tts); | ||
| 53 | +end. | ||
| 54 | + |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 28 | + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 29 | + rm vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +fpc \ | ||
| 33 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + -Fl/usr/local/Cellar/portaudio/19.7.0/lib \ | ||
| 37 | + ./piper-playback.pas | ||
| 38 | + | ||
| 39 | +# Please see ../portaudio-test/README.md | ||
| 40 | +# for how to install portaudio on macOS | ||
| 41 | + | ||
| 42 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 43 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 44 | + | ||
| 45 | +./piper-playback |
pascal-api-examples/tts/run-piper.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + popd | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +if [[ ! -f ./vits-piper-en_US-libritts_r-medium/tokens.txt ]]; then | ||
| 27 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 28 | + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 29 | + rm vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 30 | +fi | ||
| 31 | + | ||
| 32 | +fpc \ | ||
| 33 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./piper.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./piper |
| @@ -190,9 +190,9 @@ def get_piper_models() -> List[TtsModel]: | @@ -190,9 +190,9 @@ def get_piper_models() -> List[TtsModel]: | ||
| 190 | TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"), | 190 | TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"), |
| 191 | TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"), | 191 | TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"), |
| 192 | TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"), | 192 | TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"), |
| 193 | - TtsModel(model_dir="vits-piper-nl_NL-mls-medium"), | ||
| 194 | - TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"), | ||
| 195 | - TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"), | 193 | + # TtsModel(model_dir="vits-piper-nl_NL-mls-medium"), |
| 194 | + # TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"), | ||
| 195 | + # TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"), | ||
| 196 | TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"), | 196 | TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"), |
| 197 | TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"), | 197 | TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"), |
| 198 | TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"), | 198 | TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"), |
| @@ -180,9 +180,9 @@ def get_piper_models() -> List[TtsModel]: | @@ -180,9 +180,9 @@ def get_piper_models() -> List[TtsModel]: | ||
| 180 | TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"), | 180 | TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"), |
| 181 | TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"), | 181 | TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"), |
| 182 | TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"), | 182 | TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"), |
| 183 | - TtsModel(model_dir="vits-piper-nl_NL-mls-medium"), | ||
| 184 | - TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"), | ||
| 185 | - TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"), | 183 | + # TtsModel(model_dir="vits-piper-nl_NL-mls-medium"), |
| 184 | + # TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"), | ||
| 185 | + # TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"), | ||
| 186 | TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"), | 186 | TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"), |
| 187 | TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"), | 187 | TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"), |
| 188 | TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"), | 188 | TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"), |
| @@ -18,6 +18,7 @@ | @@ -18,6 +18,7 @@ | ||
| 18 | #include "sherpa-onnx/csrc/offline-punctuation.h" | 18 | #include "sherpa-onnx/csrc/offline-punctuation.h" |
| 19 | #include "sherpa-onnx/csrc/offline-recognizer.h" | 19 | #include "sherpa-onnx/csrc/offline-recognizer.h" |
| 20 | #include "sherpa-onnx/csrc/online-recognizer.h" | 20 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 21 | +#include "sherpa-onnx/csrc/resample.h" | ||
| 21 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 22 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| 22 | #include "sherpa-onnx/csrc/speaker-embedding-manager.h" | 23 | #include "sherpa-onnx/csrc/speaker-embedding-manager.h" |
| 23 | #include "sherpa-onnx/csrc/spoken-language-identification.h" | 24 | #include "sherpa-onnx/csrc/spoken-language-identification.h" |
| @@ -1584,3 +1585,56 @@ const char *SherpaOfflinePunctuationAddPunct( | @@ -1584,3 +1585,56 @@ const char *SherpaOfflinePunctuationAddPunct( | ||
| 1584 | } | 1585 | } |
| 1585 | 1586 | ||
| 1586 | void SherpaOfflinePunctuationFreeText(const char *text) { delete[] text; } | 1587 | void SherpaOfflinePunctuationFreeText(const char *text) { delete[] text; } |
| 1588 | + | ||
| 1589 | +struct SherpaOnnxLinearResampler { | ||
| 1590 | + std::unique_ptr<sherpa_onnx::LinearResample> impl; | ||
| 1591 | +}; | ||
| 1592 | + | ||
| 1593 | +SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( | ||
| 1594 | + int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, | ||
| 1595 | + int32_t num_zeros) { | ||
| 1596 | + SherpaOnnxLinearResampler *p = new SherpaOnnxLinearResampler; | ||
| 1597 | + p->impl = std::make_unique<sherpa_onnx::LinearResample>( | ||
| 1598 | + samp_rate_in_hz, samp_rate_out_hz, filter_cutoff_hz, num_zeros); | ||
| 1599 | + | ||
| 1600 | + return p; | ||
| 1601 | +} | ||
| 1602 | + | ||
| 1603 | +void SherpaOnnxDestroyLinearResampler(SherpaOnnxLinearResampler *p) { | ||
| 1604 | + delete p; | ||
| 1605 | +} | ||
| 1606 | + | ||
| 1607 | +const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( | ||
| 1608 | + SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, | ||
| 1609 | + int32_t flush) { | ||
| 1610 | + std::vector<float> o; | ||
| 1611 | + p->impl->Resample(input, input_dim, flush, &o); | ||
| 1612 | + | ||
| 1613 | + float *s = new float[o.size()]; | ||
| 1614 | + std::copy(o.begin(), o.end(), s); | ||
| 1615 | + | ||
| 1616 | + SherpaOnnxResampleOut *ans = new SherpaOnnxResampleOut; | ||
| 1617 | + ans->samples = s; | ||
| 1618 | + ans->n = static_cast<int32_t>(o.size()); | ||
| 1619 | + | ||
| 1620 | + return ans; | ||
| 1621 | +} | ||
| 1622 | + | ||
| 1623 | +void SherpaOnnxLinearResamplerResampleFree(const SherpaOnnxResampleOut *p) { | ||
| 1624 | + delete[] p->samples; | ||
| 1625 | + delete p; | ||
| 1626 | +} | ||
| 1627 | + | ||
| 1628 | +int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate( | ||
| 1629 | + const SherpaOnnxLinearResampler *p) { | ||
| 1630 | + return p->impl->GetInputSamplingRate(); | ||
| 1631 | +} | ||
| 1632 | + | ||
| 1633 | +int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( | ||
| 1634 | + const SherpaOnnxLinearResampler *p) { | ||
| 1635 | + return p->impl->GetOutputSamplingRate(); | ||
| 1636 | +} | ||
| 1637 | + | ||
| 1638 | +void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) { | ||
| 1639 | + p->impl->Reset(); | ||
| 1640 | +} |
| @@ -1315,6 +1315,52 @@ SHERPA_ONNX_API const char *SherpaOfflinePunctuationAddPunct( | @@ -1315,6 +1315,52 @@ SHERPA_ONNX_API const char *SherpaOfflinePunctuationAddPunct( | ||
| 1315 | 1315 | ||
| 1316 | SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text); | 1316 | SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text); |
| 1317 | 1317 | ||
| 1318 | +// for resampling | ||
| 1319 | +SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler | ||
| 1320 | + SherpaOnnxLinearResampler; | ||
| 1321 | + | ||
| 1322 | +/* | ||
| 1323 | + float min_freq = min(sampling_rate_in_hz, samp_rate_out_hz); | ||
| 1324 | + float lowpass_cutoff = 0.99 * 0.5 * min_freq; | ||
| 1325 | + int32_t lowpass_filter_width = 6; | ||
| 1326 | + | ||
| 1327 | + You can set filter_cutoff_hz to lowpass_cutoff | ||
| 1328 | + sand set num_zeros to lowpass_filter_width | ||
| 1329 | +*/ | ||
| 1330 | +// The user has to invoke SherpaOnnxDestroyLinearResampler() | ||
| 1331 | +// to free the returned pointer to avoid memory leak | ||
| 1332 | +SHERPA_ONNX_API SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( | ||
| 1333 | + int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, | ||
| 1334 | + int32_t num_zeros); | ||
| 1335 | + | ||
| 1336 | +SHERPA_ONNX_API void SherpaOnnxDestroyLinearResampler( | ||
| 1337 | + SherpaOnnxLinearResampler *p); | ||
| 1338 | + | ||
| 1339 | +SHERPA_ONNX_API void SherpaOnnxLinearResamplerReset( | ||
| 1340 | + SherpaOnnxLinearResampler *p); | ||
| 1341 | + | ||
| 1342 | +typedef struct SherpaOnnxResampleOut { | ||
| 1343 | + const float *samples; | ||
| 1344 | + int32_t n; | ||
| 1345 | +} SherpaOnnxResampleOut; | ||
| 1346 | +// The user has to invoke SherpaOnnxLinearResamplerResampleFree() | ||
| 1347 | +// to free the returned pointer to avoid memory leak. | ||
| 1348 | +// | ||
| 1349 | +// If this is the last segment, you can set flush to 1; otherwise, please | ||
| 1350 | +// set flush to 0 | ||
| 1351 | +SHERPA_ONNX_API const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( | ||
| 1352 | + SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, | ||
| 1353 | + int32_t flush); | ||
| 1354 | + | ||
| 1355 | +SHERPA_ONNX_API void SherpaOnnxLinearResamplerResampleFree( | ||
| 1356 | + const SherpaOnnxResampleOut *p); | ||
| 1357 | + | ||
| 1358 | +SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate( | ||
| 1359 | + const SherpaOnnxLinearResampler *p); | ||
| 1360 | + | ||
| 1361 | +SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( | ||
| 1362 | + const SherpaOnnxLinearResampler *p); | ||
| 1363 | + | ||
| 1318 | #if defined(__GNUC__) | 1364 | #if defined(__GNUC__) |
| 1319 | #pragma GCC diagnostic pop | 1365 | #pragma GCC diagnostic pop |
| 1320 | #endif | 1366 | #endif |
| 1 | -{ Copyright (c) 2024 Xiaomi Corporation } | 1 | +{ Copyright (c) 2024 Xiaomi Corporation |
| 2 | + | ||
| 3 | +Please see | ||
| 4 | +https://github.com/k2-fsa/sherpa-onnx/tree/master/pascal-api-examples | ||
| 5 | +for how to use APIs in this file. | ||
| 6 | +} | ||
| 2 | 7 | ||
| 3 | unit sherpa_onnx; | 8 | unit sherpa_onnx; |
| 4 | 9 | ||
| @@ -7,13 +12,105 @@ unit sherpa_onnx; | @@ -7,13 +12,105 @@ unit sherpa_onnx; | ||
| 7 | {$modeSwitch advancedRecords} { to support records with methods } | 12 | {$modeSwitch advancedRecords} { to support records with methods } |
| 8 | {$ENDIF} | 13 | {$ENDIF} |
| 9 | 14 | ||
| 10 | -(* {$LongStrings ON} *) | 15 | +{$LongStrings ON} |
| 11 | 16 | ||
| 12 | interface | 17 | interface |
| 13 | uses | 18 | uses |
| 14 | ctypes; | 19 | ctypes; |
| 15 | 20 | ||
| 16 | type | 21 | type |
| 22 | + TSherpaOnnxSamplesArray = array of Single; | ||
| 23 | + | ||
| 24 | + TSherpaOnnxLinearResampler = class | ||
| 25 | + private | ||
| 26 | + Handle: Pointer; | ||
| 27 | + InputSampleRate: Integer; | ||
| 28 | + OutputSampleRate: Integer; | ||
| 29 | + public | ||
| 30 | + constructor Create(SampleRateIn: Integer; SampleRateOut: Integer); | ||
| 31 | + destructor Destroy; override; | ||
| 32 | + | ||
| 33 | + function Resample(Samples: pcfloat; | ||
| 34 | + N: Integer; Flush: Boolean): TSherpaOnnxSamplesArray; overload; | ||
| 35 | + | ||
| 36 | + function Resample(Samples: array of Single; | ||
| 37 | + Flush: Boolean): TSherpaOnnxSamplesArray; overload; | ||
| 38 | + | ||
| 39 | + procedure Reset; | ||
| 40 | + | ||
| 41 | + property GetInputSampleRate: Integer Read InputSampleRate; | ||
| 42 | + property GetOutputSampleRate: Integer Read OutputSampleRate; | ||
| 43 | + end; | ||
| 44 | + | ||
| 45 | + PSherpaOnnxGeneratedAudioCallbackWithArg = ^TSherpaOnnxGeneratedAudioCallbackWithArg; | ||
| 46 | + | ||
| 47 | + TSherpaOnnxGeneratedAudioCallbackWithArg = function( | ||
| 48 | + Samples: pcfloat; N: cint32; | ||
| 49 | + Arg: Pointer): cint; cdecl; | ||
| 50 | + | ||
| 51 | + TSherpaOnnxOfflineTtsVitsModelConfig = record | ||
| 52 | + Model: AnsiString; | ||
| 53 | + Lexicon: AnsiString; | ||
| 54 | + Tokens: AnsiString; | ||
| 55 | + DataDir: AnsiString; | ||
| 56 | + NoiseScale: Single; | ||
| 57 | + NoiseScaleW: Single; | ||
| 58 | + LengthScale: Single; | ||
| 59 | + DictDir: AnsiString; | ||
| 60 | + | ||
| 61 | + function ToString: AnsiString; | ||
| 62 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig); | ||
| 63 | + end; | ||
| 64 | + | ||
| 65 | + TSherpaOnnxOfflineTtsModelConfig = record | ||
| 66 | + Vits: TSherpaOnnxOfflineTtsVitsModelConfig; | ||
| 67 | + NumThreads: Integer; | ||
| 68 | + Debug: Boolean; | ||
| 69 | + Provider: AnsiString; | ||
| 70 | + | ||
| 71 | + function ToString: AnsiString; | ||
| 72 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); | ||
| 73 | + end; | ||
| 74 | + | ||
| 75 | + TSherpaOnnxOfflineTtsConfig = record | ||
| 76 | + Model: TSherpaOnnxOfflineTtsModelConfig; | ||
| 77 | + RuleFsts: AnsiString; | ||
| 78 | + MaxNumSentences: Integer; | ||
| 79 | + RuleFars: AnsiString; | ||
| 80 | + | ||
| 81 | + function ToString: AnsiString; | ||
| 82 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsConfig); | ||
| 83 | + end; | ||
| 84 | + | ||
| 85 | + TSherpaOnnxGeneratedAudio = record | ||
| 86 | + Samples: array of Single; | ||
| 87 | + SampleRate: Integer; | ||
| 88 | + end; | ||
| 89 | + | ||
| 90 | + TSherpaOnnxOfflineTts = class | ||
| 91 | + private | ||
| 92 | + Handle: Pointer; | ||
| 93 | + SampleRate: Integer; | ||
| 94 | + NumSpeakers: Integer; | ||
| 95 | + _Config: TSherpaOnnxOfflineTtsConfig; | ||
| 96 | + public | ||
| 97 | + constructor Create(Config: TSherpaOnnxOfflineTtsConfig); | ||
| 98 | + destructor Destroy; override; | ||
| 99 | + | ||
| 100 | + function Generate(Text: AnsiString; SpeakerId: Integer; | ||
| 101 | + Speed: Single): TSherpaOnnxGeneratedAudio; overload; | ||
| 102 | + | ||
| 103 | + function Generate(Text: AnsiString; SpeakerId: Integer; | ||
| 104 | + Speed: Single; | ||
| 105 | + Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; | ||
| 106 | + Arg: Pointer | ||
| 107 | + ): TSherpaOnnxGeneratedAudio; overload; | ||
| 108 | + | ||
| 109 | + property GetHandle: Pointer Read Handle; | ||
| 110 | + property GetSampleRate: Integer Read SampleRate; | ||
| 111 | + property GetNumSpeakers: Integer Read NumSpeakers; | ||
| 112 | + end; | ||
| 113 | + | ||
| 17 | TSherpaOnnxWave = record | 114 | TSherpaOnnxWave = record |
| 18 | Samples: array of Single; { normalized to the range [-1, 1] } | 115 | Samples: array of Single; { normalized to the range [-1, 1] } |
| 19 | SampleRate: Integer; | 116 | SampleRate: Integer; |
| @@ -254,7 +351,6 @@ type | @@ -254,7 +351,6 @@ type | ||
| 254 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig); | 351 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxVadModelConfig); |
| 255 | end; | 352 | end; |
| 256 | 353 | ||
| 257 | - TSherpaOnnxSamplesArray = array of Single; | ||
| 258 | 354 | ||
| 259 | TSherpaOnnxCircularBuffer = class | 355 | TSherpaOnnxCircularBuffer = class |
| 260 | private | 356 | private |
| @@ -508,6 +604,94 @@ type | @@ -508,6 +604,94 @@ type | ||
| 508 | 604 | ||
| 509 | PSherpaOnnxSpeechSegment = ^SherpaOnnxSpeechSegment; | 605 | PSherpaOnnxSpeechSegment = ^SherpaOnnxSpeechSegment; |
| 510 | 606 | ||
| 607 | + SherpaOnnxOfflineTtsVitsModelConfig = record | ||
| 608 | + Model: PAnsiChar; | ||
| 609 | + Lexicon: PAnsiChar; | ||
| 610 | + Tokens: PAnsiChar; | ||
| 611 | + DataDir: PAnsiChar; | ||
| 612 | + NoiseScale: cfloat; | ||
| 613 | + NoiseScaleW: cfloat; | ||
| 614 | + LengthScale: cfloat; | ||
| 615 | + DictDir: PAnsiChar; | ||
| 616 | + end; | ||
| 617 | + | ||
| 618 | + SherpaOnnxOfflineTtsModelConfig = record | ||
| 619 | + Vits: SherpaOnnxOfflineTtsVitsModelConfig; | ||
| 620 | + NumThreads: cint32; | ||
| 621 | + Debug: cint32; | ||
| 622 | + Provider: PAnsiChar; | ||
| 623 | + end; | ||
| 624 | + | ||
| 625 | + SherpaOnnxOfflineTtsConfig = record | ||
| 626 | + Model: SherpaOnnxOfflineTtsModelConfig; | ||
| 627 | + RuleFsts: PAnsiChar; | ||
| 628 | + MaxNumSentences: cint32; | ||
| 629 | + RuleFars: PAnsiChar; | ||
| 630 | + end; | ||
| 631 | + | ||
| 632 | + PSherpaOnnxOfflineTtsConfig = ^SherpaOnnxOfflineTtsConfig; | ||
| 633 | + | ||
| 634 | + SherpaOnnxGeneratedAudio = record | ||
| 635 | + Samples: pcfloat; | ||
| 636 | + N: cint32; | ||
| 637 | + SampleRate: cint32; | ||
| 638 | + end; | ||
| 639 | + | ||
| 640 | + PSherpaOnnxGeneratedAudio = ^SherpaOnnxGeneratedAudio; | ||
| 641 | + | ||
| 642 | + SherpaOnnxResampleOut = record | ||
| 643 | + Samples: pcfloat; | ||
| 644 | + N: cint32; | ||
| 645 | + end; | ||
| 646 | + | ||
| 647 | + PSherpaOnnxResampleOut = ^SherpaOnnxResampleOut; | ||
| 648 | + | ||
| 649 | +function SherpaOnnxCreateLinearResampler(SampleRateInHz: cint32; | ||
| 650 | + SampleRateOutHz: cint32; | ||
| 651 | + FilterCutoffHz: cfloat; | ||
| 652 | + NumZeros: cint32): Pointer; cdecl; | ||
| 653 | + external SherpaOnnxLibName; | ||
| 654 | + | ||
| 655 | +procedure SherpaOnnxDestroyLinearResampler(P: Pointer); cdecl; | ||
| 656 | + external SherpaOnnxLibName; | ||
| 657 | + | ||
| 658 | +function SherpaOnnxLinearResamplerResample(P: Pointer; | ||
| 659 | + Samples: pcfloat; | ||
| 660 | + N: Integer; | ||
| 661 | + Flush: Integer): PSherpaOnnxResampleOut; cdecl; | ||
| 662 | + external SherpaOnnxLibName; | ||
| 663 | + | ||
| 664 | +procedure SherpaOnnxLinearResamplerResampleFree(P: PSherpaOnnxResampleOut); cdecl; | ||
| 665 | + external SherpaOnnxLibName; | ||
| 666 | + | ||
| 667 | +procedure SherpaOnnxLinearResamplerReset(P: Pointer); cdecl; | ||
| 668 | + external SherpaOnnxLibName; | ||
| 669 | + | ||
| 670 | +function SherpaOnnxCreateOfflineTts(Config: PSherpaOnnxOfflineTtsConfig): Pointer; cdecl; | ||
| 671 | + external SherpaOnnxLibName; | ||
| 672 | + | ||
| 673 | +procedure SherpaOnnxDestroyOfflineTts(Tts: Pointer); cdecl; | ||
| 674 | + external SherpaOnnxLibName; | ||
| 675 | + | ||
| 676 | +function SherpaOnnxOfflineTtsSampleRate(Tts: Pointer): cint32; cdecl; | ||
| 677 | + external SherpaOnnxLibName; | ||
| 678 | + | ||
| 679 | +function SherpaOnnxOfflineTtsNumSpeakers(Tts: Pointer): cint32; cdecl; | ||
| 680 | + external SherpaOnnxLibName; | ||
| 681 | + | ||
| 682 | +function SherpaOnnxOfflineTtsGenerate(Tts: Pointer; | ||
| 683 | + Text: PAnsiChar; Sid: cint32; Speed: cfloat): PSherpaOnnxGeneratedAudio; cdecl; | ||
| 684 | + external SherpaOnnxLibName; | ||
| 685 | + | ||
| 686 | +function SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(Tts: Pointer; | ||
| 687 | + Text: PAnsiChar; Sid: cint32; Speed: cfloat; | ||
| 688 | + Callback: PSherpaOnnxGeneratedAudioCallbackWithArg; | ||
| 689 | + Arg: Pointer): PSherpaOnnxGeneratedAudio; cdecl; | ||
| 690 | + external SherpaOnnxLibName; | ||
| 691 | + | ||
| 692 | +procedure SherpaOnnxDestroyOfflineTtsGeneratedAudio(Audio: Pointer); cdecl; | ||
| 693 | + external SherpaOnnxLibName; | ||
| 694 | + | ||
| 511 | function SherpaOnnxCreateVoiceActivityDetector(Config: PSherpaOnnxVadModelConfig; | 695 | function SherpaOnnxCreateVoiceActivityDetector(Config: PSherpaOnnxVadModelConfig; |
| 512 | BufferSizeInSeconds: cfloat): Pointer; cdecl; | 696 | BufferSizeInSeconds: cfloat): Pointer; cdecl; |
| 513 | external SherpaOnnxLibName; | 697 | external SherpaOnnxLibName; |
| @@ -793,8 +977,7 @@ constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecogniz | @@ -793,8 +977,7 @@ constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecogniz | ||
| 793 | var | 977 | var |
| 794 | C: SherpaOnnxOnlineRecognizerConfig; | 978 | C: SherpaOnnxOnlineRecognizerConfig; |
| 795 | begin | 979 | begin |
| 796 | - Initialize(C); | ||
| 797 | - | 980 | + C := Default(SherpaOnnxOnlineRecognizerConfig); |
| 798 | C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; | 981 | C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; |
| 799 | C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; | 982 | C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; |
| 800 | 983 | ||
| @@ -1051,8 +1234,7 @@ constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecogn | @@ -1051,8 +1234,7 @@ constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecogn | ||
| 1051 | var | 1234 | var |
| 1052 | C: SherpaOnnxOfflineRecognizerConfig; | 1235 | C: SherpaOnnxOfflineRecognizerConfig; |
| 1053 | begin | 1236 | begin |
| 1054 | - Initialize(C); | ||
| 1055 | - | 1237 | + C := Default(SherpaOnnxOfflineRecognizerConfig); |
| 1056 | C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; | 1238 | C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; |
| 1057 | C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; | 1239 | C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; |
| 1058 | 1240 | ||
| @@ -1369,12 +1551,11 @@ end; | @@ -1369,12 +1551,11 @@ end; | ||
| 1369 | 1551 | ||
| 1370 | constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single); | 1552 | constructor TSherpaOnnxVoiceActivityDetector.Create(Config: TSherpaOnnxVadModelConfig; BufferSizeInSeconds: Single); |
| 1371 | var | 1553 | var |
| 1372 | - C: SherpaOnnxVadModelConfig; | 1554 | + C: SherpaOnnxVadModelConfig ; |
| 1373 | begin | 1555 | begin |
| 1556 | + C := Default(SherpaOnnxVadModelConfig); | ||
| 1374 | Self._Config := Config; | 1557 | Self._Config := Config; |
| 1375 | 1558 | ||
| 1376 | - Initialize(C); | ||
| 1377 | - | ||
| 1378 | C.SileroVad.Model := PAnsiChar(Config.SileroVad.Model); | 1559 | C.SileroVad.Model := PAnsiChar(Config.SileroVad.Model); |
| 1379 | C.SileroVad.Threshold := Config.SileroVad.Threshold; | 1560 | C.SileroVad.Threshold := Config.SileroVad.Threshold; |
| 1380 | C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration; | 1561 | C.SileroVad.MinSilenceDuration := Config.SileroVad.MinSilenceDuration; |
| @@ -1460,5 +1641,197 @@ begin | @@ -1460,5 +1641,197 @@ begin | ||
| 1460 | SherpaOnnxVoiceActivityDetectorFlush(Self.Handle); | 1641 | SherpaOnnxVoiceActivityDetectorFlush(Self.Handle); |
| 1461 | end; | 1642 | end; |
| 1462 | 1643 | ||
| 1463 | -end. | 1644 | +function TSherpaOnnxOfflineTtsVitsModelConfig.ToString: AnsiString; |
| 1645 | +begin | ||
| 1646 | + Result := Format('TSherpaOnnxOfflineTtsVitsModelConfig(' + | ||
| 1647 | + 'Model := %s, ' + | ||
| 1648 | + 'Lexicon := %s, ' + | ||
| 1649 | + 'Tokens := %s, ' + | ||
| 1650 | + 'DataDir := %s, ' + | ||
| 1651 | + 'NoiseScale := %.2f, ' + | ||
| 1652 | + 'NoiseScaleW := %.2f, ' + | ||
| 1653 | + 'LengthScale := %.2f, ' + | ||
| 1654 | + 'DictDir := %s' + | ||
| 1655 | + ')', | ||
| 1656 | + [Self.Model, Self.Lexicon, Self.Tokens, Self.DataDir, Self.NoiseScale, | ||
| 1657 | + Self.NoiseScaleW, Self.LengthScale, Self.DictDir | ||
| 1658 | + ]); | ||
| 1659 | +end; | ||
| 1660 | + | ||
| 1661 | +class operator TSherpaOnnxOfflineTtsVitsModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsVitsModelConfig); | ||
| 1662 | +begin | ||
| 1663 | + Dest.NoiseScale := 0.667; | ||
| 1664 | + Dest.NoiseScaleW := 0.8; | ||
| 1665 | + Dest.LengthScale := 1.0; | ||
| 1666 | +end; | ||
| 1667 | + | ||
| 1668 | +function TSherpaOnnxOfflineTtsModelConfig.ToString: AnsiString; | ||
| 1669 | +begin | ||
| 1670 | + Result := Format('TSherpaOnnxOfflineTtsModelConfig(' + | ||
| 1671 | + 'Vits := %s, ' + | ||
| 1672 | + 'NumThreads := %d, ' + | ||
| 1673 | + 'Debug := %s, ' + | ||
| 1674 | + 'Provider := %s' + | ||
| 1675 | + ')', | ||
| 1676 | + [Self.Vits.ToString, Self.NumThreads, Self.Debug.ToString, Self.Provider | ||
| 1677 | + ]); | ||
| 1678 | +end; | ||
| 1679 | + | ||
| 1680 | +class operator TSherpaOnnxOfflineTtsModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsModelConfig); | ||
| 1681 | +begin | ||
| 1682 | + Dest.NumThreads := 1; | ||
| 1683 | + Dest.Debug := False; | ||
| 1684 | + Dest.Provider := 'cpu'; | ||
| 1685 | +end; | ||
| 1686 | + | ||
| 1687 | +function TSherpaOnnxOfflineTtsConfig.ToString: AnsiString; | ||
| 1688 | +begin | ||
| 1689 | + Result := Format('TSherpaOnnxOfflineTtsConfig(' + | ||
| 1690 | + 'Model := %s, ' + | ||
| 1691 | + 'RuleFsts := %s, ' + | ||
| 1692 | + 'MaxNumSentences := %d, ' + | ||
| 1693 | + 'RuleFars := %s' + | ||
| 1694 | + ')', | ||
| 1695 | + [Self.Model.ToString, Self.RuleFsts, Self.MaxNumSentences, Self.RuleFars | ||
| 1696 | + ]); | ||
| 1697 | +end; | ||
| 1698 | + | ||
| 1699 | +class operator TSherpaOnnxOfflineTtsConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineTtsConfig); | ||
| 1700 | +begin | ||
| 1701 | + Dest.MaxNumSentences := 1; | ||
| 1702 | +end; | ||
| 1703 | + | ||
| 1704 | +constructor TSherpaOnnxOfflineTts.Create(Config: TSherpaOnnxOfflineTtsConfig); | ||
| 1705 | +var | ||
| 1706 | + C: SherpaOnnxOfflineTtsConfig; | ||
| 1707 | +begin | ||
| 1708 | + C := Default(SherpaOnnxOfflineTtsConfig); | ||
| 1709 | + Self._Config := Config; | ||
| 1710 | + | ||
| 1711 | + C.Model.Vits.Model := PAnsiChar(Config.Model.Vits.Model); | ||
| 1712 | + C.Model.Vits.Lexicon := PAnsiChar(Config.Model.Vits.Lexicon); | ||
| 1713 | + C.Model.Vits.Tokens := PAnsiChar(Config.Model.Vits.Tokens); | ||
| 1714 | + C.Model.Vits.DataDir := PAnsiChar(Config.Model.Vits.DataDir); | ||
| 1715 | + C.Model.Vits.NoiseScale := Config.Model.Vits.NoiseScale; | ||
| 1716 | + C.Model.Vits.NoiseScaleW := Config.Model.Vits.NoiseScaleW; | ||
| 1717 | + C.Model.Vits.LengthScale := Config.Model.Vits.LengthScale; | ||
| 1718 | + C.Model.Vits.DictDir := PAnsiChar(Config.Model.Vits.DictDir); | ||
| 1719 | + | ||
| 1720 | + C.Model.NumThreads := Config.Model.NumThreads; | ||
| 1721 | + C.Model.Provider := PAnsiChar(Config.Model.Provider); | ||
| 1722 | + C.Model.Debug := Ord(Config.Model.Debug); | ||
| 1723 | + | ||
| 1724 | + C.RuleFsts := PAnsiChar(Config.RuleFsts); | ||
| 1725 | + C.MaxNumSentences := Config.MaxNumSentences; | ||
| 1726 | + C.RuleFars := PAnsiChar(Config.RuleFars); | ||
| 1727 | + | ||
| 1728 | + Self.Handle := SherpaOnnxCreateOfflineTts(@C); | ||
| 1729 | + | ||
| 1730 | + Self.SampleRate := SherpaOnnxOfflineTtsSampleRate(Self.Handle); | ||
| 1731 | + Self.NumSpeakers := SherpaOnnxOfflineTtsNumSpeakers(Self.Handle); | ||
| 1732 | +end; | ||
| 1733 | + | ||
| 1734 | +destructor TSherpaOnnxOfflineTts.Destroy; | ||
| 1735 | +begin | ||
| 1736 | + SherpaOnnxDestroyOfflineTts(Self.Handle); | ||
| 1737 | + Self.Handle := nil; | ||
| 1738 | +end; | ||
| 1739 | + | ||
| 1740 | +function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer; | ||
| 1741 | + Speed: Single): TSherpaOnnxGeneratedAudio; | ||
| 1742 | +var | ||
| 1743 | + Audio: PSherpaOnnxGeneratedAudio; | ||
| 1744 | + I: Integer; | ||
| 1745 | +begin | ||
| 1746 | + Result := Default(TSherpaOnnxGeneratedAudio); | ||
| 1747 | + | ||
| 1748 | + Audio := SherpaOnnxOfflineTtsGenerate(Self.Handle, PAnsiChar(Text), SpeakerId, Speed); | ||
| 1749 | + | ||
| 1750 | + SetLength(Result.Samples, Audio^.N); | ||
| 1751 | + Result.SampleRate := Audio^.SampleRate; | ||
| 1752 | + | ||
| 1753 | + for I := Low(Result.Samples) to High(Result.Samples) do | ||
| 1754 | + begin | ||
| 1755 | + Result.Samples[I] := Audio^.Samples[I]; | ||
| 1756 | + end; | ||
| 1757 | + | ||
| 1758 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | ||
| 1759 | +end; | ||
| 1760 | + | ||
| 1761 | +function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer; | ||
| 1762 | + Speed: Single; | ||
| 1763 | + Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; | ||
| 1764 | + Arg: Pointer | ||
| 1765 | + ): TSherpaOnnxGeneratedAudio; | ||
| 1766 | +var | ||
| 1767 | + Audio: PSherpaOnnxGeneratedAudio; | ||
| 1768 | + I: Integer; | ||
| 1769 | +begin | ||
| 1770 | + Result := Default(TSherpaOnnxGeneratedAudio); | ||
| 1771 | + | ||
| 1772 | + Audio := SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(Self.Handle, PAnsiChar(Text), | ||
| 1773 | + SpeakerId, Speed, Callback, Arg); | ||
| 1774 | + | ||
| 1775 | + SetLength(Result.Samples, Audio^.N); | ||
| 1776 | + Result.SampleRate := Audio^.SampleRate; | ||
| 1777 | + | ||
| 1778 | + for I := Low(Result.Samples) to High(Result.Samples) do | ||
| 1779 | + begin | ||
| 1780 | + Result.Samples[I] := Audio^.Samples[I]; | ||
| 1781 | + end; | ||
| 1782 | + | ||
| 1783 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | ||
| 1784 | +end; | ||
| 1464 | 1785 | ||
| 1786 | +constructor TSherpaOnnxLinearResampler.Create(SampleRateIn: Integer; SampleRateOut: Integer); | ||
| 1787 | +var | ||
| 1788 | + MinFreq: Single; | ||
| 1789 | + LowpassCutoff: Single; | ||
| 1790 | + LowpassFilterWidth: Integer = 6; | ||
| 1791 | +begin | ||
| 1792 | + if SampleRateIn > SampleRateOut then | ||
| 1793 | + MinFreq := SampleRateOut | ||
| 1794 | + else | ||
| 1795 | + MinFreq := SampleRateIn; | ||
| 1796 | + | ||
| 1797 | + LowpassCutoff := 0.99 * 0.5 * MinFreq; | ||
| 1798 | + | ||
| 1799 | + Self.Handle := SherpaOnnxCreateLinearResampler(SampleRateIn, | ||
| 1800 | + SampleRateOut, LowpassCutoff, LowpassFilterWidth); | ||
| 1801 | + Self.InputSampleRate := SampleRateIn; | ||
| 1802 | + Self.OutputSampleRate := SampleRateOut; | ||
| 1803 | +end; | ||
| 1804 | + | ||
| 1805 | +destructor TSherpaOnnxLinearResampler.Destroy; | ||
| 1806 | +begin | ||
| 1807 | + SherpaOnnxDestroyLinearResampler(Self.Handle); | ||
| 1808 | + Self.Handle := nil; | ||
| 1809 | +end; | ||
| 1810 | + | ||
| 1811 | +function TSherpaOnnxLinearResampler.Resample(Samples: pcfloat; | ||
| 1812 | + N: Integer; Flush: Boolean): TSherpaOnnxSamplesArray; | ||
| 1813 | +var | ||
| 1814 | + P: PSherpaOnnxResampleOut; | ||
| 1815 | + I: Integer; | ||
| 1816 | +begin | ||
| 1817 | + Result := Default(TSherpaOnnxSamplesArray); | ||
| 1818 | + P := SherpaOnnxLinearResamplerResample(Self.Handle, Samples, N, Ord(Flush)); | ||
| 1819 | + SetLength(Result, P^.N); | ||
| 1820 | + | ||
| 1821 | + for I := Low(Result) to High(Result) do | ||
| 1822 | + Result[I] := P^.Samples[I]; | ||
| 1823 | + | ||
| 1824 | + SherpaOnnxLinearResamplerResampleFree(P); | ||
| 1825 | +end; | ||
| 1826 | + | ||
| 1827 | +function TSherpaOnnxLinearResampler.Resample(Samples: array of Single; Flush: Boolean): TSherpaOnnxSamplesArray; | ||
| 1828 | +begin | ||
| 1829 | + Result := Self.Resample(pcfloat(Samples), Length(Samples), Flush); | ||
| 1830 | +end; | ||
| 1831 | + | ||
| 1832 | +procedure TSherpaOnnxLinearResampler.Reset; | ||
| 1833 | +begin | ||
| 1834 | + SherpaOnnxLinearResamplerReset(Self.Handle); | ||
| 1835 | +end; | ||
| 1836 | + | ||
| 1837 | +end. |
-
请 注册 或 登录 后发表评论