Committed by
GitHub
Use a separate thread to initialize models for lazarus examples. (#1270)
So that the main thread is not blocked and the user interface is responsive.
正在显示
7 个修改的文件
包含
405 行增加
和
301 行删除
| @@ -160,6 +160,10 @@ | @@ -160,6 +160,10 @@ | ||
| 160 | <Filename Value="my_worker.pas"/> | 160 | <Filename Value="my_worker.pas"/> |
| 161 | <IsPartOfProject Value="True"/> | 161 | <IsPartOfProject Value="True"/> |
| 162 | </Unit> | 162 | </Unit> |
| 163 | + <Unit> | ||
| 164 | + <Filename Value="my_init.pas"/> | ||
| 165 | + <IsPartOfProject Value="True"/> | ||
| 166 | + </Unit> | ||
| 163 | </Units> | 167 | </Units> |
| 164 | </ProjectOptions> | 168 | </ProjectOptions> |
| 165 | <CompilerOptions> | 169 | <CompilerOptions> |
| @@ -11,7 +11,7 @@ uses | @@ -11,7 +11,7 @@ uses | ||
| 11 | athreads, | 11 | athreads, |
| 12 | {$ENDIF} | 12 | {$ENDIF} |
| 13 | Interfaces, // this includes the LCL widgetset | 13 | Interfaces, // this includes the LCL widgetset |
| 14 | - Forms, unit1, my_worker | 14 | + Forms, unit1, my_worker, my_init |
| 15 | { you can add units after this }; | 15 | { you can add units after this }; |
| 16 | 16 | ||
| 17 | {$R *.res} | 17 | {$R *.res} |
| 1 | +unit my_init; | ||
| 2 | + | ||
| 3 | +{$mode ObjFPC}{$H+} | ||
| 4 | + | ||
| 5 | +interface | ||
| 6 | + | ||
| 7 | +uses | ||
| 8 | + {$IFDEF UNIX} | ||
| 9 | + cthreads, | ||
| 10 | + cmem, | ||
| 11 | + {$ENDIF} | ||
| 12 | + {$IFDEF HASAMIGA} | ||
| 13 | + athreads, | ||
| 14 | + {$ENDIF} | ||
| 15 | + Classes, SysUtils; | ||
| 16 | + | ||
| 17 | +type | ||
| 18 | + TMyInitThread = class(TThread) | ||
| 19 | + private | ||
| 20 | + Status: AnsiString; | ||
| 21 | + ModelDir: AnsiString; | ||
| 22 | + procedure ShowStatus; | ||
| 23 | + | ||
| 24 | + protected | ||
| 25 | + procedure Execute; override; | ||
| 26 | + public | ||
| 27 | + Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString); | ||
| 28 | + end; | ||
| 29 | + | ||
| 30 | +var | ||
| 31 | + MyInitThread: TMyInitThread; | ||
| 32 | + | ||
| 33 | +implementation | ||
| 34 | + | ||
| 35 | +uses | ||
| 36 | + unit1, sherpa_onnx; | ||
| 37 | + | ||
| 38 | +function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector; | ||
| 39 | +var | ||
| 40 | + Config: TSherpaOnnxVadModelConfig; | ||
| 41 | + | ||
| 42 | + SampleRate: Integer; | ||
| 43 | + WindowSize: Integer; | ||
| 44 | +begin | ||
| 45 | + Initialize(Config); | ||
| 46 | + | ||
| 47 | + SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 48 | + WindowSize := 512; {Please don't change it unless you know the details} | ||
| 49 | + | ||
| 50 | + Config.SileroVad.Model := VadFilename; | ||
| 51 | + Config.SileroVad.MinSpeechDuration := 0.5; | ||
| 52 | + Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 53 | + Config.SileroVad.Threshold := 0.5; | ||
| 54 | + Config.SileroVad.WindowSize := WindowSize; | ||
| 55 | + Config.NumThreads:= 2; | ||
| 56 | + Config.Debug:= True; | ||
| 57 | + Config.Provider:= 'cpu'; | ||
| 58 | + Config.SampleRate := SampleRate; | ||
| 59 | + | ||
| 60 | + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); | ||
| 61 | +end; | ||
| 62 | + | ||
| 63 | +function CreateOfflineRecognizerTransducer( | ||
| 64 | + Tokens: AnsiString; | ||
| 65 | + Encoder: AnsiString; | ||
| 66 | + Decoder: AnsiString; | ||
| 67 | + Joiner: AnsiString; | ||
| 68 | + ModelType: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 69 | +var | ||
| 70 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 71 | +begin | ||
| 72 | + Initialize(Config); | ||
| 73 | + | ||
| 74 | + Config.ModelConfig.Transducer.Encoder := Encoder; | ||
| 75 | + Config.ModelConfig.Transducer.Decoder := Decoder; | ||
| 76 | + Config.ModelConfig.Transducer.Joiner := Joiner; | ||
| 77 | + | ||
| 78 | + Config.ModelConfig.ModelType := ModelType; | ||
| 79 | + Config.ModelConfig.Tokens := Tokens; | ||
| 80 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 81 | + Config.ModelConfig.NumThreads := 2; | ||
| 82 | + Config.ModelConfig.Debug := False; | ||
| 83 | + | ||
| 84 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 85 | +end; | ||
| 86 | + | ||
| 87 | +function CreateOfflineRecognizerTeleSpeech( | ||
| 88 | + Tokens: AnsiString; | ||
| 89 | + TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 90 | +var | ||
| 91 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 92 | +begin | ||
| 93 | + Initialize(Config); | ||
| 94 | + | ||
| 95 | + Config.ModelConfig.TeleSpeechCtc := TeleSpeech; | ||
| 96 | + | ||
| 97 | + Config.ModelConfig.Tokens := Tokens; | ||
| 98 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 99 | + Config.ModelConfig.NumThreads := 2; | ||
| 100 | + Config.ModelConfig.Debug := False; | ||
| 101 | + | ||
| 102 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 103 | +end; | ||
| 104 | + | ||
| 105 | +function CreateOfflineRecognizerParaformer( | ||
| 106 | + Tokens: AnsiString; | ||
| 107 | + Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 108 | +var | ||
| 109 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 110 | +begin | ||
| 111 | + Initialize(Config); | ||
| 112 | + | ||
| 113 | + Config.ModelConfig.Paraformer.Model := Paraformer; | ||
| 114 | + | ||
| 115 | + Config.ModelConfig.Tokens := Tokens; | ||
| 116 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 117 | + Config.ModelConfig.NumThreads := 2; | ||
| 118 | + Config.ModelConfig.Debug := False; | ||
| 119 | + | ||
| 120 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 121 | +end; | ||
| 122 | + | ||
| 123 | +function CreateOfflineRecognizerSenseVoice( | ||
| 124 | + Tokens: AnsiString; | ||
| 125 | + SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 126 | +var | ||
| 127 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 128 | +begin | ||
| 129 | + Initialize(Config); | ||
| 130 | + | ||
| 131 | + Config.ModelConfig.SenseVoice.Model := SenseVoice; | ||
| 132 | + Config.ModelConfig.SenseVoice.Language := 'auto'; | ||
| 133 | + Config.ModelConfig.SenseVoice.UseItn := True; | ||
| 134 | + Config.ModelConfig.Tokens := Tokens; | ||
| 135 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 136 | + Config.ModelConfig.NumThreads := 2; | ||
| 137 | + Config.ModelConfig.Debug := False; | ||
| 138 | + | ||
| 139 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 140 | +end; | ||
| 141 | + | ||
| 142 | +function CreateOfflineRecognizerWhisper( | ||
| 143 | + Tokens: AnsiString; | ||
| 144 | + WhisperEncoder: AnsiString; | ||
| 145 | + WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 146 | +var | ||
| 147 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 148 | +begin | ||
| 149 | + Initialize(Config); | ||
| 150 | + | ||
| 151 | + Config.ModelConfig.Whisper.Encoder := WhisperEncoder; | ||
| 152 | + Config.ModelConfig.Whisper.Decoder := WhisperDecoder; | ||
| 153 | + Config.ModelConfig.Tokens := Tokens; | ||
| 154 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 155 | + Config.ModelConfig.NumThreads := 2; | ||
| 156 | + Config.ModelConfig.Debug := False; | ||
| 157 | + | ||
| 158 | + Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 159 | +end; | ||
| 160 | + | ||
| 161 | +constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString); | ||
| 162 | +begin | ||
| 163 | + inherited Create(CreateSuspended); | ||
| 164 | + ModelDir := ModelDirectory; | ||
| 165 | + FreeOnTerminate := True; | ||
| 166 | +end; | ||
| 167 | + | ||
| 168 | +procedure TMyInitThread.ShowStatus; | ||
| 169 | +begin | ||
| 170 | + Form1.UpdateInitStatus(Status); | ||
| 171 | +end; | ||
| 172 | + | ||
| 173 | +procedure TMyInitThread.Execute; | ||
| 174 | +var | ||
| 175 | + Msg: AnsiString; | ||
| 176 | + VadFilename: AnsiString; | ||
| 177 | + Tokens: AnsiString; | ||
| 178 | + | ||
| 179 | + WhisperEncoder: AnsiString; | ||
| 180 | + WhisperDecoder: AnsiString; | ||
| 181 | + | ||
| 182 | + SenseVoice: AnsiString; | ||
| 183 | + | ||
| 184 | + Paraformer: AnsiString; | ||
| 185 | + | ||
| 186 | + TeleSpeech: AnsiString; | ||
| 187 | + | ||
| 188 | + TransducerEncoder: AnsiString; // from icefall | ||
| 189 | + TransducerDecoder: AnsiString; | ||
| 190 | + TransducerJoiner: AnsiString; | ||
| 191 | + | ||
| 192 | + NeMoTransducerEncoder: AnsiString; | ||
| 193 | + NeMoTransducerDecoder: AnsiString; | ||
| 194 | + NeMoTransducerJoiner: AnsiString; | ||
| 195 | +begin | ||
| 196 | + VadFilename := ModelDir + 'silero_vad.onnx'; | ||
| 197 | + Tokens := ModelDir + 'tokens.txt'; | ||
| 198 | + | ||
| 199 | + { | ||
| 200 | + Please refer to | ||
| 201 | + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models | ||
| 202 | + for a list of whisper models. | ||
| 203 | + | ||
| 204 | + In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt | ||
| 205 | + You need to rename the existing model files. | ||
| 206 | + | ||
| 207 | + For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do | ||
| 208 | + mv tiny.en-tokens.txt tokens.txt | ||
| 209 | + | ||
| 210 | + mv tiny.en-encoder.onnx whisper-encoder.onnx | ||
| 211 | + mv tiny.en-decoder.onnx whisper-decoder.onnx | ||
| 212 | + | ||
| 213 | + // or use the int8.onnx | ||
| 214 | + | ||
| 215 | + mv tiny.en-encoder.int8.onnx whisper-encoder.onnx | ||
| 216 | + mv tiny.en-decoder.int8.onnx whisper-decoder.onnx | ||
| 217 | + } | ||
| 218 | + WhisperEncoder := ModelDir + 'whisper-encoder.onnx'; | ||
| 219 | + WhisperDecoder := ModelDir + 'whisper-decoder.onnx'; | ||
| 220 | + | ||
| 221 | + | ||
| 222 | + { | ||
| 223 | + Please refer to | ||
| 224 | + https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models | ||
| 225 | + to download models for SenseVoice. | ||
| 226 | + | ||
| 227 | + In the code, we use the normalized model name sense-voice.onnx. You have | ||
| 228 | + to rename the downloaded model files. | ||
| 229 | + | ||
| 230 | + For example, you need to use | ||
| 231 | + | ||
| 232 | + mv model.onnx sense-voice.onnx | ||
| 233 | + | ||
| 234 | + // or use the int8.onnx | ||
| 235 | + mv model.int8.onnx sense-voice.onnx | ||
| 236 | + } | ||
| 237 | + | ||
| 238 | + SenseVoice := ModelDir + 'sense-voice.onnx'; | ||
| 239 | + | ||
| 240 | + { | ||
| 241 | + Please refer to | ||
| 242 | + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html | ||
| 243 | + to download paraformer models. | ||
| 244 | + | ||
| 245 | + Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx. | ||
| 246 | + An example is given below for the rename: | ||
| 247 | + | ||
| 248 | + cp model.onnx paraformer.onnx | ||
| 249 | + | ||
| 250 | + // or use int8.onnx | ||
| 251 | + cp model.int8.onnx paraformer.onnx | ||
| 252 | + } | ||
| 253 | + Paraformer := ModelDir + 'paraformer.onnx'; | ||
| 254 | + | ||
| 255 | + | ||
| 256 | + { | ||
| 257 | + please refer to | ||
| 258 | + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html | ||
| 259 | + to download TeleSpeech models. | ||
| 260 | + | ||
| 261 | + Note that you have to rename model files after downloading. The following | ||
| 262 | + is an example | ||
| 263 | + | ||
| 264 | + mv model.onnx telespeech.onnx | ||
| 265 | + | ||
| 266 | + // or to use int8.onnx | ||
| 267 | + | ||
| 268 | + mv model.int8.onnx telespeech.onnx | ||
| 269 | + } | ||
| 270 | + | ||
| 271 | + TeleSpeech := ModelDir + 'telespeech.onnx'; | ||
| 272 | + | ||
| 273 | + | ||
| 274 | + { | ||
| 275 | + Please refer to | ||
| 276 | + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 277 | + to download an icefall offline transducer model. Note that you need to rename the | ||
| 278 | + model files to transducer-encoder.onnx, transducer-decoder.onnx, and | ||
| 279 | + transducer-joiner.onnx | ||
| 280 | + } | ||
| 281 | + TransducerEncoder := ModelDir + 'transducer-encoder.onnx'; | ||
| 282 | + TransducerDecoder := ModelDir + 'transducer-decoder.onnx'; | ||
| 283 | + TransducerJoiner := ModelDir + 'transducer-joiner.onnx'; | ||
| 284 | + | ||
| 285 | + { | ||
| 286 | + Please visit | ||
| 287 | + https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 288 | + to donwload a NeMo transducer model. | ||
| 289 | + } | ||
| 290 | + NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx'; | ||
| 291 | + NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; | ||
| 292 | + NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; | ||
| 293 | + | ||
| 294 | + if not FileExists(VadFilename) then | ||
| 295 | + begin | ||
| 296 | + Status := VadFilename + ' does not exist! Please download it from' + | ||
| 297 | + sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'; | ||
| 298 | + Synchronize(@ShowStatus); | ||
| 299 | + Exit; | ||
| 300 | + end; | ||
| 301 | + | ||
| 302 | + if Form1.Vad = nil then | ||
| 303 | + begin | ||
| 304 | + Form1.Vad := CreateVad(VadFilename); | ||
| 305 | + end; | ||
| 306 | + | ||
| 307 | + if not FileExists(Tokens) then | ||
| 308 | + begin | ||
| 309 | + Status := Tokens + ' not found. Please download a non-streaming ASR model first!'; | ||
| 310 | + Synchronize(@ShowStatus); | ||
| 311 | + Exit; | ||
| 312 | + end; | ||
| 313 | + | ||
| 314 | + if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then | ||
| 315 | + begin | ||
| 316 | + Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder); | ||
| 317 | + Msg := 'Whisper'; | ||
| 318 | + end | ||
| 319 | + else if FileExists(SenseVoice) then | ||
| 320 | + begin | ||
| 321 | + Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice); | ||
| 322 | + Msg := 'SenseVoice'; | ||
| 323 | + end | ||
| 324 | + else if FileExists(Paraformer) then | ||
| 325 | + begin | ||
| 326 | + Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer); | ||
| 327 | + Msg := 'Paraformer'; | ||
| 328 | + end | ||
| 329 | + else if FileExists(TeleSpeech) then | ||
| 330 | + begin | ||
| 331 | + Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech); | ||
| 332 | + Msg := 'TeleSpeech'; | ||
| 333 | + end | ||
| 334 | + else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then | ||
| 335 | + begin | ||
| 336 | + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, | ||
| 337 | + TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer'); | ||
| 338 | + Msg := 'Zipformer transducer'; | ||
| 339 | + end | ||
| 340 | + else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then | ||
| 341 | + begin | ||
| 342 | + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, | ||
| 343 | + NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); | ||
| 344 | + Msg := 'NeMo transducer'; | ||
| 345 | + end | ||
| 346 | + else | ||
| 347 | + begin | ||
| 348 | + Status := 'Please download at least one non-streaming speech recognition model first.'; | ||
| 349 | + Synchronize(@ShowStatus); | ||
| 350 | + Exit; | ||
| 351 | + end; | ||
| 352 | + | ||
| 353 | + Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!'; | ||
| 354 | + Synchronize(@ShowStatus); | ||
| 355 | +end; | ||
| 356 | + | ||
| 357 | +end. | ||
| 358 | + |
| @@ -41,6 +41,7 @@ type | @@ -41,6 +41,7 @@ type | ||
| 41 | StopTime: Single; | 41 | StopTime: Single; |
| 42 | TotalDuration: Single); | 42 | TotalDuration: Single); |
| 43 | procedure UpdateProgress(StopTime: Single; TotalDuration: Single); | 43 | procedure UpdateProgress(StopTime: Single; TotalDuration: Single); |
| 44 | + procedure UpdateInitStatus(Status: AnsiString); | ||
| 44 | public | 45 | public |
| 45 | Vad: TSherpaOnnxVoiceActivityDetector; | 46 | Vad: TSherpaOnnxVoiceActivityDetector; |
| 46 | OfflineRecognizer: TSherpaOnnxOfflineRecognizer; | 47 | OfflineRecognizer: TSherpaOnnxOfflineRecognizer; |
| @@ -52,7 +53,8 @@ var | @@ -52,7 +53,8 @@ var | ||
| 52 | implementation | 53 | implementation |
| 53 | 54 | ||
| 54 | uses | 55 | uses |
| 55 | - my_worker | 56 | + my_worker, |
| 57 | + my_init | ||
| 56 | {$IFDEF DARWIN} | 58 | {$IFDEF DARWIN} |
| 57 | ,MacOSAll | 59 | ,MacOSAll |
| 58 | ,CocoaAll | 60 | ,CocoaAll |
| @@ -76,128 +78,7 @@ begin | @@ -76,128 +78,7 @@ begin | ||
| 76 | end; | 78 | end; |
| 77 | {$ENDIF} | 79 | {$ENDIF} |
| 78 | 80 | ||
| 79 | -function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector; | ||
| 80 | -var | ||
| 81 | - Config: TSherpaOnnxVadModelConfig; | ||
| 82 | - | ||
| 83 | - SampleRate: Integer; | ||
| 84 | - WindowSize: Integer; | ||
| 85 | -begin | ||
| 86 | - Initialize(Config); | ||
| 87 | - | ||
| 88 | - SampleRate := 16000; {Please don't change it unless you know the details} | ||
| 89 | - WindowSize := 512; {Please don't change it unless you know the details} | ||
| 90 | - | ||
| 91 | - Config.SileroVad.Model := VadFilename; | ||
| 92 | - Config.SileroVad.MinSpeechDuration := 0.5; | ||
| 93 | - Config.SileroVad.MinSilenceDuration := 0.5; | ||
| 94 | - Config.SileroVad.Threshold := 0.5; | ||
| 95 | - Config.SileroVad.WindowSize := WindowSize; | ||
| 96 | - Config.NumThreads:= 2; | ||
| 97 | - Config.Debug:= True; | ||
| 98 | - Config.Provider:= 'cpu'; | ||
| 99 | - Config.SampleRate := SampleRate; | ||
| 100 | - | ||
| 101 | - Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30); | ||
| 102 | -end; | ||
| 103 | - | ||
| 104 | -function CreateOfflineRecognizerTransducer( | ||
| 105 | - Tokens: AnsiString; | ||
| 106 | - Encoder: AnsiString; | ||
| 107 | - Decoder: AnsiString; | ||
| 108 | - Joiner: AnsiString; | ||
| 109 | - ModelType: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 110 | -var | ||
| 111 | - Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 112 | -begin | ||
| 113 | - Initialize(Config); | ||
| 114 | - | ||
| 115 | - Config.ModelConfig.Transducer.Encoder := Encoder; | ||
| 116 | - Config.ModelConfig.Transducer.Decoder := Decoder; | ||
| 117 | - Config.ModelConfig.Transducer.Joiner := Joiner; | ||
| 118 | - | ||
| 119 | - Config.ModelConfig.ModelType := ModelType; | ||
| 120 | - Config.ModelConfig.Tokens := Tokens; | ||
| 121 | - Config.ModelConfig.Provider := 'cpu'; | ||
| 122 | - Config.ModelConfig.NumThreads := 2; | ||
| 123 | - Config.ModelConfig.Debug := False; | ||
| 124 | - | ||
| 125 | - Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 126 | -end; | ||
| 127 | - | ||
| 128 | -function CreateOfflineRecognizerTeleSpeech( | ||
| 129 | - Tokens: AnsiString; | ||
| 130 | - TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 131 | -var | ||
| 132 | - Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 133 | -begin | ||
| 134 | - Initialize(Config); | ||
| 135 | - | ||
| 136 | - Config.ModelConfig.TeleSpeechCtc := TeleSpeech; | ||
| 137 | - | ||
| 138 | - Config.ModelConfig.Tokens := Tokens; | ||
| 139 | - Config.ModelConfig.Provider := 'cpu'; | ||
| 140 | - Config.ModelConfig.NumThreads := 2; | ||
| 141 | - Config.ModelConfig.Debug := False; | ||
| 142 | - | ||
| 143 | - Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 144 | -end; | ||
| 145 | - | ||
| 146 | -function CreateOfflineRecognizerParaformer( | ||
| 147 | - Tokens: AnsiString; | ||
| 148 | - Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 149 | -var | ||
| 150 | - Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 151 | -begin | ||
| 152 | - Initialize(Config); | ||
| 153 | - | ||
| 154 | - Config.ModelConfig.Paraformer.Model := Paraformer; | ||
| 155 | - | ||
| 156 | - Config.ModelConfig.Tokens := Tokens; | ||
| 157 | - Config.ModelConfig.Provider := 'cpu'; | ||
| 158 | - Config.ModelConfig.NumThreads := 2; | ||
| 159 | - Config.ModelConfig.Debug := False; | ||
| 160 | - | ||
| 161 | - Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 162 | -end; | ||
| 163 | - | ||
| 164 | -function CreateOfflineRecognizerSenseVoice( | ||
| 165 | - Tokens: AnsiString; | ||
| 166 | - SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 167 | -var | ||
| 168 | - Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 169 | -begin | ||
| 170 | - Initialize(Config); | ||
| 171 | 81 | ||
| 172 | - Config.ModelConfig.SenseVoice.Model := SenseVoice; | ||
| 173 | - Config.ModelConfig.SenseVoice.Language := 'auto'; | ||
| 174 | - Config.ModelConfig.SenseVoice.UseItn := True; | ||
| 175 | - Config.ModelConfig.Tokens := Tokens; | ||
| 176 | - Config.ModelConfig.Provider := 'cpu'; | ||
| 177 | - Config.ModelConfig.NumThreads := 2; | ||
| 178 | - Config.ModelConfig.Debug := False; | ||
| 179 | - | ||
| 180 | - Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 181 | -end; | ||
| 182 | - | ||
| 183 | -function CreateOfflineRecognizerWhisper( | ||
| 184 | - Tokens: AnsiString; | ||
| 185 | - WhisperEncoder: AnsiString; | ||
| 186 | - WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer; | ||
| 187 | -var | ||
| 188 | - Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 189 | -begin | ||
| 190 | - Initialize(Config); | ||
| 191 | - | ||
| 192 | - Config.ModelConfig.Whisper.Encoder := WhisperEncoder; | ||
| 193 | - Config.ModelConfig.Whisper.Decoder := WhisperDecoder; | ||
| 194 | - Config.ModelConfig.Tokens := Tokens; | ||
| 195 | - Config.ModelConfig.Provider := 'cpu'; | ||
| 196 | - Config.ModelConfig.NumThreads := 2; | ||
| 197 | - Config.ModelConfig.Debug := False; | ||
| 198 | - | ||
| 199 | - Result := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 200 | -end; | ||
| 201 | 82 | ||
| 202 | {$R *.lfm} | 83 | {$R *.lfm} |
| 203 | 84 | ||
| @@ -256,7 +137,7 @@ end; | @@ -256,7 +137,7 @@ end; | ||
| 256 | 137 | ||
| 257 | procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction); | 138 | procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction); |
| 258 | begin | 139 | begin |
| 259 | - if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then | 140 | + if (MyWorkerThread <> nil) and (not MyWorkerThread.Finished) then |
| 260 | begin | 141 | begin |
| 261 | MyWorkerThread.Terminate; | 142 | MyWorkerThread.Terminate; |
| 262 | MyWorkerThread.WaitFor; | 143 | MyWorkerThread.WaitFor; |
| @@ -310,29 +191,35 @@ begin | @@ -310,29 +191,35 @@ begin | ||
| 310 | Form1.ResultMemo.Lines.Add(NewResult); | 191 | Form1.ResultMemo.Lines.Add(NewResult); |
| 311 | end; | 192 | end; |
| 312 | 193 | ||
| 313 | -procedure TForm1.InitBtnClick(Sender: TObject); | ||
| 314 | -var | ||
| 315 | - Msg: AnsiString; | ||
| 316 | - ModelDir: AnsiString; | ||
| 317 | - VadFilename: AnsiString; | ||
| 318 | - Tokens: AnsiString; | ||
| 319 | - | ||
| 320 | - WhisperEncoder: AnsiString; | ||
| 321 | - WhisperDecoder: AnsiString; | ||
| 322 | - | ||
| 323 | - SenseVoice: AnsiString; | ||
| 324 | - | ||
| 325 | - Paraformer: AnsiString; | 194 | +procedure TForm1.UpdateInitStatus(Status: AnsiString); |
| 195 | +begin | ||
| 196 | + if EndsStr('model is initialized succesfully!', Status) then | ||
| 197 | + begin | ||
| 198 | + Form1.ResultMemo.Lines.Add(Status); | ||
| 199 | + Form1.ResultMemo.Lines.Add('Please select a 16000Hz wave file to generate subtiles'); | ||
| 200 | + Form1.ResultMemo.Lines.Add('You can download some test wave files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models'); | ||
| 201 | + Form1.ResultMemo.Lines.Add('For instance:'); | ||
| 202 | + Form1.ResultMemo.Lines.Add(' Chinese test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav'); | ||
| 203 | + Form1.ResultMemo.Lines.Add(' English test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav'); | ||
| 204 | + FileNameEdt.Enabled := True; | ||
| 205 | + SelectFileBtn.Enabled := True; | ||
| 326 | 206 | ||
| 327 | - TeleSpeech: AnsiString; | 207 | + end |
| 208 | + else | ||
| 209 | + begin | ||
| 210 | + ShowMessage(Status); | ||
| 211 | + Form1.ResultMemo.Lines.Clear(); | ||
| 212 | + Form1.ResultMemo.Lines.Add('Please refer to'); | ||
| 213 | + Form1.ResultMemo.Lines.Add('https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-models'); | ||
| 214 | + Form1.ResultMemo.Lines.Add('for how to download models'); | ||
| 328 | 215 | ||
| 329 | - TransducerEncoder: AnsiString; // from icefall | ||
| 330 | - TransducerDecoder: AnsiString; | ||
| 331 | - TransducerJoiner: AnsiString; | 216 | + InitBtn.Enabled := True; |
| 217 | + end; | ||
| 218 | +end; | ||
| 332 | 219 | ||
| 333 | - NeMoTransducerEncoder: AnsiString; | ||
| 334 | - NeMoTransducerDecoder: AnsiString; | ||
| 335 | - NeMoTransducerJoiner: AnsiString; | 220 | +procedure TForm1.InitBtnClick(Sender: TObject); |
| 221 | +var | ||
| 222 | + ModelDir: AnsiString; | ||
| 336 | begin | 223 | begin |
| 337 | {$IFDEF DARWIN} | 224 | {$IFDEF DARWIN} |
| 338 | ModelDir := GetResourcesPath; | 225 | ModelDir := GetResourcesPath; |
| @@ -340,162 +227,10 @@ begin | @@ -340,162 +227,10 @@ begin | ||
| 340 | ModelDir := './'; | 227 | ModelDir := './'; |
| 341 | {$ENDIF} | 228 | {$ENDIF} |
| 342 | 229 | ||
| 343 | - VadFilename := ModelDir + 'silero_vad.onnx'; | ||
| 344 | - Tokens := ModelDir + 'tokens.txt'; | ||
| 345 | - | ||
| 346 | - { | ||
| 347 | - Please refer to | ||
| 348 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models | ||
| 349 | - for a list of whisper models. | ||
| 350 | - | ||
| 351 | - In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt | ||
| 352 | - You need to rename the existing model files. | ||
| 353 | - | ||
| 354 | - For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do | ||
| 355 | - mv tiny.en-tokens.txt tokens.txt | ||
| 356 | - | ||
| 357 | - mv tiny.en-encoder.onnx whisper-encoder.onnx | ||
| 358 | - mv tiny.en-decoder.onnx whisper-decoder.onnx | ||
| 359 | - | ||
| 360 | - // or use the int8.onnx | ||
| 361 | - | ||
| 362 | - mv tiny.en-encoder.int8.onnx whisper-encoder.onnx | ||
| 363 | - mv tiny.en-decoder.int8.onnx whisper-decoder.onnx | ||
| 364 | - } | ||
| 365 | - WhisperEncoder := ModelDir + 'whisper-encoder.onnx'; | ||
| 366 | - WhisperDecoder := ModelDir + 'whisper-decoder.onnx'; | ||
| 367 | - | ||
| 368 | - | ||
| 369 | - { | ||
| 370 | - Please refer to | ||
| 371 | - https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models | ||
| 372 | - to download models for SenseVoice. | ||
| 373 | - | ||
| 374 | - In the code, we use the normalized model name sense-voice.onnx. You have | ||
| 375 | - to rename the downloaded model files. | ||
| 376 | - | ||
| 377 | - For example, you need to use | ||
| 378 | - | ||
| 379 | - mv model.onnx sense-voice.onnx | ||
| 380 | - | ||
| 381 | - // or use the int8.onnx | ||
| 382 | - mv model.int8.onnx sense-voice.onnx | ||
| 383 | - } | ||
| 384 | - | ||
| 385 | - SenseVoice := ModelDir + 'sense-voice.onnx'; | ||
| 386 | - | ||
| 387 | - { | ||
| 388 | - Please refer to | ||
| 389 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html | ||
| 390 | - to download paraformer models. | ||
| 391 | - | ||
| 392 | - Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx. | ||
| 393 | - An example is given below for the rename: | ||
| 394 | - | ||
| 395 | - cp model.onnx paraformer.onnx | ||
| 396 | - | ||
| 397 | - // or use int8.onnx | ||
| 398 | - cp model.int8.onnx paraformer.onnx | ||
| 399 | - } | ||
| 400 | - Paraformer := ModelDir + 'paraformer.onnx'; | ||
| 401 | - | ||
| 402 | - | ||
| 403 | - { | ||
| 404 | - please refer to | ||
| 405 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html | ||
| 406 | - to download TeleSpeech models. | ||
| 407 | - | ||
| 408 | - Note that you have to rename model files after downloading. The following | ||
| 409 | - is an example | ||
| 410 | - | ||
| 411 | - mv model.onnx telespeech.onnx | ||
| 412 | - | ||
| 413 | - // or to use int8.onnx | ||
| 414 | - | ||
| 415 | - mv model.int8.onnx telespeech.onnx | ||
| 416 | - } | ||
| 417 | - | ||
| 418 | - TeleSpeech := ModelDir + 'telespeech.onnx'; | ||
| 419 | - | ||
| 420 | - | ||
| 421 | - { | ||
| 422 | - Please refer to | ||
| 423 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 424 | - to download an icefall offline transducer model. Note that you need to rename the | ||
| 425 | - model files to transducer-encoder.onnx, transducer-decoder.onnx, and | ||
| 426 | - transducer-joiner.onnx | ||
| 427 | - } | ||
| 428 | - TransducerEncoder := ModelDir + 'transducer-encoder.onnx'; | ||
| 429 | - TransducerDecoder := ModelDir + 'transducer-decoder.onnx'; | ||
| 430 | - TransducerJoiner := ModelDir + 'transducer-joiner.onnx'; | ||
| 431 | - | ||
| 432 | - { | ||
| 433 | - Please visit | ||
| 434 | - https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 435 | - to donwload a NeMo transducer model. | ||
| 436 | - } | ||
| 437 | - NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx'; | ||
| 438 | - NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx'; | ||
| 439 | - NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx'; | ||
| 440 | - | ||
| 441 | - if not FileExists(VadFilename) then | ||
| 442 | - begin | ||
| 443 | - ShowMessage(VadFilename + ' does not exist! Please download it from' + | ||
| 444 | - sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models' | ||
| 445 | - ); | ||
| 446 | - Exit; | ||
| 447 | - end; | ||
| 448 | - | ||
| 449 | - Self.Vad := CreateVad(VadFilename); | ||
| 450 | - | ||
| 451 | - if not FileExists(Tokens) then | ||
| 452 | - begin | ||
| 453 | - ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!'); | ||
| 454 | - Exit; | ||
| 455 | - end; | ||
| 456 | - | ||
| 457 | - if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then | ||
| 458 | - begin | ||
| 459 | - OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder); | ||
| 460 | - Msg := 'Whisper'; | ||
| 461 | - end | ||
| 462 | - else if FileExists(SenseVoice) then | ||
| 463 | - begin | ||
| 464 | - OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice); | ||
| 465 | - Msg := 'SenseVoice'; | ||
| 466 | - end | ||
| 467 | - else if FileExists(Paraformer) then | ||
| 468 | - begin | ||
| 469 | - OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer); | ||
| 470 | - Msg := 'Paraformer'; | ||
| 471 | - end | ||
| 472 | - else if FileExists(TeleSpeech) then | ||
| 473 | - begin | ||
| 474 | - OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech); | ||
| 475 | - Msg := 'TeleSpeech'; | ||
| 476 | - end | ||
| 477 | - else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then | ||
| 478 | - begin | ||
| 479 | - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, | ||
| 480 | - TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer'); | ||
| 481 | - Msg := 'Zipformer transducer'; | ||
| 482 | - end | ||
| 483 | - else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then | ||
| 484 | - begin | ||
| 485 | - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens, | ||
| 486 | - NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer'); | ||
| 487 | - Msg := 'NeMo transducer'; | ||
| 488 | - end | ||
| 489 | - else | ||
| 490 | - begin | ||
| 491 | - ShowMessage('Please download at least one non-streaming speech recognition model first.'); | ||
| 492 | - Exit; | ||
| 493 | - end; | ||
| 494 | - | ||
| 495 | - MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0); | ||
| 496 | - FileNameEdt.Enabled := True; | ||
| 497 | - SelectFileBtn.Enabled := True; | ||
| 498 | - InitBtn.Enabled := False; | 230 | + Form1.ResultMemo.Lines.Clear(); |
| 231 | + ResultMemo.Lines.Add('Initializing the model. Please wait...'); | ||
| 232 | + MyInitThread := TMyInitThread.Create(False, ModelDir); | ||
| 233 | + InitBtn.Enabled := False; | ||
| 499 | end; | 234 | end; |
| 500 | 235 | ||
| 501 | end. | 236 | end. |
| @@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const { | @@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const { | ||
| 69 | std::string SileroVadModelConfig::ToString() const { | 69 | std::string SileroVadModelConfig::ToString() const { |
| 70 | std::ostringstream os; | 70 | std::ostringstream os; |
| 71 | 71 | ||
| 72 | - os << "SilerVadModelConfig("; | 72 | + os << "SileroVadModelConfig("; |
| 73 | os << "model=\"" << model << "\", "; | 73 | os << "model=\"" << model << "\", "; |
| 74 | os << "threshold=" << threshold << ", "; | 74 | os << "threshold=" << threshold << ", "; |
| 75 | os << "min_silence_duration=" << min_silence_duration << ", "; | 75 | os << "min_silence_duration=" << min_silence_duration << ", "; |
| @@ -98,6 +98,7 @@ type | @@ -98,6 +98,7 @@ type | ||
| 98 | destructor Destroy; override; | 98 | destructor Destroy; override; |
| 99 | procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); | 99 | procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); |
| 100 | procedure InputFinished; | 100 | procedure InputFinished; |
| 101 | + property GetHandle: Pointer Read Handle; | ||
| 101 | end; | 102 | end; |
| 102 | 103 | ||
| 103 | TSherpaOnnxOnlineRecognizer = class | 104 | TSherpaOnnxOnlineRecognizer = class |
| @@ -116,6 +117,7 @@ type | @@ -116,6 +117,7 @@ type | ||
| 116 | function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; | 117 | function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; |
| 117 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; | 118 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; |
| 118 | property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config; | 119 | property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config; |
| 120 | + property GetHandle: Pointer Read Handle; | ||
| 119 | end; | 121 | end; |
| 120 | 122 | ||
| 121 | TSherpaOnnxOfflineTransducerModelConfig = record | 123 | TSherpaOnnxOfflineTransducerModelConfig = record |
| @@ -213,6 +215,7 @@ type | @@ -213,6 +215,7 @@ type | ||
| 213 | constructor Create(P: Pointer); | 215 | constructor Create(P: Pointer); |
| 214 | destructor Destroy; override; | 216 | destructor Destroy; override; |
| 215 | procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); | 217 | procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); |
| 218 | + property GetHandle: Pointer Read Handle; | ||
| 216 | end; | 219 | end; |
| 217 | 220 | ||
| 218 | TSherpaOnnxOfflineRecognizer = class | 221 | TSherpaOnnxOfflineRecognizer = class |
| @@ -226,6 +229,7 @@ type | @@ -226,6 +229,7 @@ type | ||
| 226 | procedure Decode(Stream: TSherpaOnnxOfflineStream); | 229 | procedure Decode(Stream: TSherpaOnnxOfflineStream); |
| 227 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; | 230 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; |
| 228 | property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; | 231 | property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; |
| 232 | + property GetHandle: Pointer Read Handle; | ||
| 229 | end; | 233 | end; |
| 230 | 234 | ||
| 231 | TSherpaOnnxSileroVadModelConfig = record | 235 | TSherpaOnnxSileroVadModelConfig = record |
| @@ -262,6 +266,7 @@ type | @@ -262,6 +266,7 @@ type | ||
| 262 | procedure Reset; | 266 | procedure Reset; |
| 263 | function Size: Integer; | 267 | function Size: Integer; |
| 264 | function Head: Integer; | 268 | function Head: Integer; |
| 269 | + property GetHandle: Pointer Read Handle; | ||
| 265 | end; | 270 | end; |
| 266 | 271 | ||
| 267 | TSherpaOnnxSpeechSegment = record | 272 | TSherpaOnnxSpeechSegment = record |
| @@ -286,6 +291,7 @@ type | @@ -286,6 +291,7 @@ type | ||
| 286 | procedure Reset; | 291 | procedure Reset; |
| 287 | procedure Flush; | 292 | procedure Flush; |
| 288 | property Config: TSherpaOnnxVadModelConfig Read _Config; | 293 | property Config: TSherpaOnnxVadModelConfig Read _Config; |
| 294 | + property GetHandle: Pointer Read Handle; | ||
| 289 | end; | 295 | end; |
| 290 | 296 | ||
| 291 | { It supports reading a single channel wave with 16-bit encoded samples. | 297 | { It supports reading a single channel wave with 16-bit encoded samples. |
-
请 注册 或 登录 后发表评论