Fangjun Kuang
Committed by GitHub

Use a separate thread to initialize models for lazarus examples. (#1270)

So that the main thread is not blocked and the user interface is responsive.
@@ -5,6 +5,7 @@ on: @@ -5,6 +5,7 @@ on:
5 branches: 5 branches:
6 - master 6 - master
7 - lazarus 7 - lazarus
  8 + - fix-lazarus
8 paths: 9 paths:
9 - '.github/workflows/lazarus.yaml' 10 - '.github/workflows/lazarus.yaml'
10 - 'CMakeLists.txt' 11 - 'CMakeLists.txt'
@@ -160,6 +160,10 @@ @@ -160,6 +160,10 @@
160 <Filename Value="my_worker.pas"/> 160 <Filename Value="my_worker.pas"/>
161 <IsPartOfProject Value="True"/> 161 <IsPartOfProject Value="True"/>
162 </Unit> 162 </Unit>
  163 + <Unit>
  164 + <Filename Value="my_init.pas"/>
  165 + <IsPartOfProject Value="True"/>
  166 + </Unit>
163 </Units> 167 </Units>
164 </ProjectOptions> 168 </ProjectOptions>
165 <CompilerOptions> 169 <CompilerOptions>
@@ -11,7 +11,7 @@ uses @@ -11,7 +11,7 @@ uses
11 athreads, 11 athreads,
12 {$ENDIF} 12 {$ENDIF}
13 Interfaces, // this includes the LCL widgetset 13 Interfaces, // this includes the LCL widgetset
14 - Forms, unit1, my_worker 14 + Forms, unit1, my_worker, my_init
15 { you can add units after this }; 15 { you can add units after this };
16 16
17 {$R *.res} 17 {$R *.res}
  1 +unit my_init;
  2 +
  3 +{$mode ObjFPC}{$H+}
  4 +
  5 +interface
  6 +
  7 +uses
  8 + {$IFDEF UNIX}
  9 + cthreads,
  10 + cmem,
  11 + {$ENDIF}
  12 + {$IFDEF HASAMIGA}
  13 + athreads,
  14 + {$ENDIF}
  15 + Classes, SysUtils;
  16 +
  17 +type
  18 + TMyInitThread = class(TThread)
  19 + private
  20 + Status: AnsiString;
  21 + ModelDir: AnsiString;
  22 + procedure ShowStatus;
  23 +
  24 + protected
  25 + procedure Execute; override;
  26 + public
  27 + Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString);
  28 + end;
  29 +
  30 +var
  31 + MyInitThread: TMyInitThread;
  32 +
  33 +implementation
  34 +
  35 +uses
  36 + unit1, sherpa_onnx;
  37 +
  38 +function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
  39 +var
  40 + Config: TSherpaOnnxVadModelConfig;
  41 +
  42 + SampleRate: Integer;
  43 + WindowSize: Integer;
  44 +begin
  45 + Initialize(Config);
  46 +
  47 + SampleRate := 16000; {Please don't change it unless you know the details}
  48 + WindowSize := 512; {Please don't change it unless you know the details}
  49 +
  50 + Config.SileroVad.Model := VadFilename;
  51 + Config.SileroVad.MinSpeechDuration := 0.5;
  52 + Config.SileroVad.MinSilenceDuration := 0.5;
  53 + Config.SileroVad.Threshold := 0.5;
  54 + Config.SileroVad.WindowSize := WindowSize;
  55 + Config.NumThreads:= 2;
  56 + Config.Debug:= True;
  57 + Config.Provider:= 'cpu';
  58 + Config.SampleRate := SampleRate;
  59 +
  60 + Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
  61 +end;
  62 +
  63 +function CreateOfflineRecognizerTransducer(
  64 + Tokens: AnsiString;
  65 + Encoder: AnsiString;
  66 + Decoder: AnsiString;
  67 + Joiner: AnsiString;
  68 + ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
  69 +var
  70 + Config: TSherpaOnnxOfflineRecognizerConfig;
  71 +begin
  72 + Initialize(Config);
  73 +
  74 + Config.ModelConfig.Transducer.Encoder := Encoder;
  75 + Config.ModelConfig.Transducer.Decoder := Decoder;
  76 + Config.ModelConfig.Transducer.Joiner := Joiner;
  77 +
  78 + Config.ModelConfig.ModelType := ModelType;
  79 + Config.ModelConfig.Tokens := Tokens;
  80 + Config.ModelConfig.Provider := 'cpu';
  81 + Config.ModelConfig.NumThreads := 2;
  82 + Config.ModelConfig.Debug := False;
  83 +
  84 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  85 +end;
  86 +
  87 +function CreateOfflineRecognizerTeleSpeech(
  88 + Tokens: AnsiString;
  89 + TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
  90 +var
  91 + Config: TSherpaOnnxOfflineRecognizerConfig;
  92 +begin
  93 + Initialize(Config);
  94 +
  95 + Config.ModelConfig.TeleSpeechCtc := TeleSpeech;
  96 +
  97 + Config.ModelConfig.Tokens := Tokens;
  98 + Config.ModelConfig.Provider := 'cpu';
  99 + Config.ModelConfig.NumThreads := 2;
  100 + Config.ModelConfig.Debug := False;
  101 +
  102 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  103 +end;
  104 +
  105 +function CreateOfflineRecognizerParaformer(
  106 + Tokens: AnsiString;
  107 + Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
  108 +var
  109 + Config: TSherpaOnnxOfflineRecognizerConfig;
  110 +begin
  111 + Initialize(Config);
  112 +
  113 + Config.ModelConfig.Paraformer.Model := Paraformer;
  114 +
  115 + Config.ModelConfig.Tokens := Tokens;
  116 + Config.ModelConfig.Provider := 'cpu';
  117 + Config.ModelConfig.NumThreads := 2;
  118 + Config.ModelConfig.Debug := False;
  119 +
  120 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  121 +end;
  122 +
  123 +function CreateOfflineRecognizerSenseVoice(
  124 + Tokens: AnsiString;
  125 + SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
  126 +var
  127 + Config: TSherpaOnnxOfflineRecognizerConfig;
  128 +begin
  129 + Initialize(Config);
  130 +
  131 + Config.ModelConfig.SenseVoice.Model := SenseVoice;
  132 + Config.ModelConfig.SenseVoice.Language := 'auto';
  133 + Config.ModelConfig.SenseVoice.UseItn := True;
  134 + Config.ModelConfig.Tokens := Tokens;
  135 + Config.ModelConfig.Provider := 'cpu';
  136 + Config.ModelConfig.NumThreads := 2;
  137 + Config.ModelConfig.Debug := False;
  138 +
  139 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  140 +end;
  141 +
  142 +function CreateOfflineRecognizerWhisper(
  143 + Tokens: AnsiString;
  144 + WhisperEncoder: AnsiString;
  145 + WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
  146 +var
  147 + Config: TSherpaOnnxOfflineRecognizerConfig;
  148 +begin
  149 + Initialize(Config);
  150 +
  151 + Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
  152 + Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
  153 + Config.ModelConfig.Tokens := Tokens;
  154 + Config.ModelConfig.Provider := 'cpu';
  155 + Config.ModelConfig.NumThreads := 2;
  156 + Config.ModelConfig.Debug := False;
  157 +
  158 + Result := TSherpaOnnxOfflineRecognizer.Create(Config);
  159 +end;
  160 +
  161 +constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString);
  162 +begin
  163 + inherited Create(CreateSuspended);
  164 + ModelDir := ModelDirectory;
  165 + FreeOnTerminate := True;
  166 +end;
  167 +
  168 +procedure TMyInitThread.ShowStatus;
  169 +begin
  170 + Form1.UpdateInitStatus(Status);
  171 +end;
  172 +
  173 +procedure TMyInitThread.Execute;
  174 +var
  175 + Msg: AnsiString;
  176 + VadFilename: AnsiString;
  177 + Tokens: AnsiString;
  178 +
  179 + WhisperEncoder: AnsiString;
  180 + WhisperDecoder: AnsiString;
  181 +
  182 + SenseVoice: AnsiString;
  183 +
  184 + Paraformer: AnsiString;
  185 +
  186 + TeleSpeech: AnsiString;
  187 +
  188 + TransducerEncoder: AnsiString; // from icefall
  189 + TransducerDecoder: AnsiString;
  190 + TransducerJoiner: AnsiString;
  191 +
  192 + NeMoTransducerEncoder: AnsiString;
  193 + NeMoTransducerDecoder: AnsiString;
  194 + NeMoTransducerJoiner: AnsiString;
  195 +begin
  196 + VadFilename := ModelDir + 'silero_vad.onnx';
  197 + Tokens := ModelDir + 'tokens.txt';
  198 +
  199 + {
  200 + Please refer to
  201 + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
  202 + for a list of whisper models.
  203 +
  204 + In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
  205 + You need to rename the existing model files.
  206 +
  207 + For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
  208 + mv tiny.en-tokens.txt tokens.txt
  209 +
  210 + mv tiny.en-encoder.onnx whisper-encoder.onnx
  211 + mv tiny.en-decoder.onnx whisper-decoder.onnx
  212 +
  213 + // or use the int8.onnx
  214 +
  215 + mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
  216 + mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
  217 + }
  218 + WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
  219 + WhisperDecoder := ModelDir + 'whisper-decoder.onnx';
  220 +
  221 +
  222 + {
  223 + Please refer to
  224 + https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
  225 + to download models for SenseVoice.
  226 +
  227 + In the code, we use the normalized model name sense-voice.onnx. You have
  228 + to rename the downloaded model files.
  229 +
  230 + For example, you need to use
  231 +
  232 + mv model.onnx sense-voice.onnx
  233 +
  234 + // or use the int8.onnx
  235 + mv model.int8.onnx sense-voice.onnx
  236 + }
  237 +
  238 + SenseVoice := ModelDir + 'sense-voice.onnx';
  239 +
  240 + {
  241 + Please refer to
  242 + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
  243 + to download paraformer models.
  244 +
  245 + Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
  246 + An example is given below for the rename:
  247 +
  248 + cp model.onnx paraformer.onnx
  249 +
  250 + // or use int8.onnx
  251 + cp model.int8.onnx paraformer.onnx
  252 + }
  253 + Paraformer := ModelDir + 'paraformer.onnx';
  254 +
  255 +
  256 + {
  257 + please refer to
  258 + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
  259 + to download TeleSpeech models.
  260 +
  261 + Note that you have to rename model files after downloading. The following
  262 + is an example
  263 +
  264 + mv model.onnx telespeech.onnx
  265 +
  266 + // or to use int8.onnx
  267 +
  268 + mv model.int8.onnx telespeech.onnx
  269 + }
  270 +
  271 + TeleSpeech := ModelDir + 'telespeech.onnx';
  272 +
  273 +
  274 + {
  275 + Please refer to
  276 + https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  277 + to download an icefall offline transducer model. Note that you need to rename the
  278 + model files to transducer-encoder.onnx, transducer-decoder.onnx, and
  279 + transducer-joiner.onnx
  280 + }
  281 + TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
  282 + TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
  283 + TransducerJoiner := ModelDir + 'transducer-joiner.onnx';
  284 +
  285 + {
  286 + Please visit
  287 + https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  288 + to donwload a NeMo transducer model.
  289 + }
  290 + NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
  291 + NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
  292 + NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';
  293 +
  294 + if not FileExists(VadFilename) then
  295 + begin
  296 + Status := VadFilename + ' does not exist! Please download it from' +
  297 + sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models';
  298 + Synchronize(@ShowStatus);
  299 + Exit;
  300 + end;
  301 +
  302 + if Form1.Vad = nil then
  303 + begin
  304 + Form1.Vad := CreateVad(VadFilename);
  305 + end;
  306 +
  307 + if not FileExists(Tokens) then
  308 + begin
  309 + Status := Tokens + ' not found. Please download a non-streaming ASR model first!';
  310 + Synchronize(@ShowStatus);
  311 + Exit;
  312 + end;
  313 +
  314 + if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
  315 + begin
  316 + Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
  317 + Msg := 'Whisper';
  318 + end
  319 + else if FileExists(SenseVoice) then
  320 + begin
  321 + Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
  322 + Msg := 'SenseVoice';
  323 + end
  324 + else if FileExists(Paraformer) then
  325 + begin
  326 + Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
  327 + Msg := 'Paraformer';
  328 + end
  329 + else if FileExists(TeleSpeech) then
  330 + begin
  331 + Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
  332 + Msg := 'TeleSpeech';
  333 + end
  334 + else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
  335 + begin
  336 + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
  337 + TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
  338 + Msg := 'Zipformer transducer';
  339 + end
  340 + else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
  341 + begin
  342 + Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
  343 + NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
  344 + Msg := 'NeMo transducer';
  345 + end
  346 + else
  347 + begin
  348 + Status := 'Please download at least one non-streaming speech recognition model first.';
  349 + Synchronize(@ShowStatus);
  350 + Exit;
  351 + end;
  352 +
  353 + Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!';
  354 + Synchronize(@ShowStatus);
  355 +end;
  356 +
  357 +end.
  358 +
@@ -41,6 +41,7 @@ type @@ -41,6 +41,7 @@ type
41 StopTime: Single; 41 StopTime: Single;
42 TotalDuration: Single); 42 TotalDuration: Single);
43 procedure UpdateProgress(StopTime: Single; TotalDuration: Single); 43 procedure UpdateProgress(StopTime: Single; TotalDuration: Single);
  44 + procedure UpdateInitStatus(Status: AnsiString);
44 public 45 public
45 Vad: TSherpaOnnxVoiceActivityDetector; 46 Vad: TSherpaOnnxVoiceActivityDetector;
46 OfflineRecognizer: TSherpaOnnxOfflineRecognizer; 47 OfflineRecognizer: TSherpaOnnxOfflineRecognizer;
@@ -52,7 +53,8 @@ var @@ -52,7 +53,8 @@ var
52 implementation 53 implementation
53 54
54 uses 55 uses
55 - my_worker 56 + my_worker,
  57 + my_init
56 {$IFDEF DARWIN} 58 {$IFDEF DARWIN}
57 ,MacOSAll 59 ,MacOSAll
58 ,CocoaAll 60 ,CocoaAll
@@ -76,128 +78,7 @@ begin @@ -76,128 +78,7 @@ begin
76 end; 78 end;
77 {$ENDIF} 79 {$ENDIF}
78 80
79 -function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;  
80 -var  
81 - Config: TSherpaOnnxVadModelConfig;  
82 -  
83 - SampleRate: Integer;  
84 - WindowSize: Integer;  
85 -begin  
86 - Initialize(Config);  
87 -  
88 - SampleRate := 16000; {Please don't change it unless you know the details}  
89 - WindowSize := 512; {Please don't change it unless you know the details}  
90 -  
91 - Config.SileroVad.Model := VadFilename;  
92 - Config.SileroVad.MinSpeechDuration := 0.5;  
93 - Config.SileroVad.MinSilenceDuration := 0.5;  
94 - Config.SileroVad.Threshold := 0.5;  
95 - Config.SileroVad.WindowSize := WindowSize;  
96 - Config.NumThreads:= 2;  
97 - Config.Debug:= True;  
98 - Config.Provider:= 'cpu';  
99 - Config.SampleRate := SampleRate;  
100 -  
101 - Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);  
102 -end;  
103 -  
104 -function CreateOfflineRecognizerTransducer(  
105 - Tokens: AnsiString;  
106 - Encoder: AnsiString;  
107 - Decoder: AnsiString;  
108 - Joiner: AnsiString;  
109 - ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;  
110 -var  
111 - Config: TSherpaOnnxOfflineRecognizerConfig;  
112 -begin  
113 - Initialize(Config);  
114 -  
115 - Config.ModelConfig.Transducer.Encoder := Encoder;  
116 - Config.ModelConfig.Transducer.Decoder := Decoder;  
117 - Config.ModelConfig.Transducer.Joiner := Joiner;  
118 -  
119 - Config.ModelConfig.ModelType := ModelType;  
120 - Config.ModelConfig.Tokens := Tokens;  
121 - Config.ModelConfig.Provider := 'cpu';  
122 - Config.ModelConfig.NumThreads := 2;  
123 - Config.ModelConfig.Debug := False;  
124 -  
125 - Result := TSherpaOnnxOfflineRecognizer.Create(Config);  
126 -end;  
127 -  
128 -function CreateOfflineRecognizerTeleSpeech(  
129 - Tokens: AnsiString;  
130 - TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;  
131 -var  
132 - Config: TSherpaOnnxOfflineRecognizerConfig;  
133 -begin  
134 - Initialize(Config);  
135 -  
136 - Config.ModelConfig.TeleSpeechCtc := TeleSpeech;  
137 -  
138 - Config.ModelConfig.Tokens := Tokens;  
139 - Config.ModelConfig.Provider := 'cpu';  
140 - Config.ModelConfig.NumThreads := 2;  
141 - Config.ModelConfig.Debug := False;  
142 -  
143 - Result := TSherpaOnnxOfflineRecognizer.Create(Config);  
144 -end;  
145 -  
146 -function CreateOfflineRecognizerParaformer(  
147 - Tokens: AnsiString;  
148 - Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;  
149 -var  
150 - Config: TSherpaOnnxOfflineRecognizerConfig;  
151 -begin  
152 - Initialize(Config);  
153 -  
154 - Config.ModelConfig.Paraformer.Model := Paraformer;  
155 -  
156 - Config.ModelConfig.Tokens := Tokens;  
157 - Config.ModelConfig.Provider := 'cpu';  
158 - Config.ModelConfig.NumThreads := 2;  
159 - Config.ModelConfig.Debug := False;  
160 -  
161 - Result := TSherpaOnnxOfflineRecognizer.Create(Config);  
162 -end;  
163 -  
164 -function CreateOfflineRecognizerSenseVoice(  
165 - Tokens: AnsiString;  
166 - SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;  
167 -var  
168 - Config: TSherpaOnnxOfflineRecognizerConfig;  
169 -begin  
170 - Initialize(Config);  
171 81
172 - Config.ModelConfig.SenseVoice.Model := SenseVoice;  
173 - Config.ModelConfig.SenseVoice.Language := 'auto';  
174 - Config.ModelConfig.SenseVoice.UseItn := True;  
175 - Config.ModelConfig.Tokens := Tokens;  
176 - Config.ModelConfig.Provider := 'cpu';  
177 - Config.ModelConfig.NumThreads := 2;  
178 - Config.ModelConfig.Debug := False;  
179 -  
180 - Result := TSherpaOnnxOfflineRecognizer.Create(Config);  
181 -end;  
182 -  
183 -function CreateOfflineRecognizerWhisper(  
184 - Tokens: AnsiString;  
185 - WhisperEncoder: AnsiString;  
186 - WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;  
187 -var  
188 - Config: TSherpaOnnxOfflineRecognizerConfig;  
189 -begin  
190 - Initialize(Config);  
191 -  
192 - Config.ModelConfig.Whisper.Encoder := WhisperEncoder;  
193 - Config.ModelConfig.Whisper.Decoder := WhisperDecoder;  
194 - Config.ModelConfig.Tokens := Tokens;  
195 - Config.ModelConfig.Provider := 'cpu';  
196 - Config.ModelConfig.NumThreads := 2;  
197 - Config.ModelConfig.Debug := False;  
198 -  
199 - Result := TSherpaOnnxOfflineRecognizer.Create(Config);  
200 -end;  
201 82
202 {$R *.lfm} 83 {$R *.lfm}
203 84
@@ -256,7 +137,7 @@ end; @@ -256,7 +137,7 @@ end;
256 137
257 procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction); 138 procedure TForm1.FormClose(Sender: TObject; var CloseAction: TCloseAction);
258 begin 139 begin
259 - if (MyWorkerThread <> nil) and not MyWorkerThread.Finished then 140 + if (MyWorkerThread <> nil) and (not MyWorkerThread.Finished) then
260 begin 141 begin
261 MyWorkerThread.Terminate; 142 MyWorkerThread.Terminate;
262 MyWorkerThread.WaitFor; 143 MyWorkerThread.WaitFor;
@@ -310,29 +191,35 @@ begin @@ -310,29 +191,35 @@ begin
310 Form1.ResultMemo.Lines.Add(NewResult); 191 Form1.ResultMemo.Lines.Add(NewResult);
311 end; 192 end;
312 193
313 -procedure TForm1.InitBtnClick(Sender: TObject);  
314 -var  
315 - Msg: AnsiString;  
316 - ModelDir: AnsiString;  
317 - VadFilename: AnsiString;  
318 - Tokens: AnsiString;  
319 -  
320 - WhisperEncoder: AnsiString;  
321 - WhisperDecoder: AnsiString;  
322 -  
323 - SenseVoice: AnsiString;  
324 -  
325 - Paraformer: AnsiString; 194 +procedure TForm1.UpdateInitStatus(Status: AnsiString);
  195 +begin
  196 + if EndsStr('model is initialized succesfully!', Status) then
  197 + begin
  198 + Form1.ResultMemo.Lines.Add(Status);
  199 + Form1.ResultMemo.Lines.Add('Please select a 16000Hz wave file to generate subtiles');
  200 + Form1.ResultMemo.Lines.Add('You can download some test wave files from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models');
  201 + Form1.ResultMemo.Lines.Add('For instance:');
  202 + Form1.ResultMemo.Lines.Add(' Chinese test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav');
  203 + Form1.ResultMemo.Lines.Add(' English test wave: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav');
  204 + FileNameEdt.Enabled := True;
  205 + SelectFileBtn.Enabled := True;
326 206
327 - TeleSpeech: AnsiString; 207 + end
  208 + else
  209 + begin
  210 + ShowMessage(Status);
  211 + Form1.ResultMemo.Lines.Clear();
  212 + Form1.ResultMemo.Lines.Add('Please refer to');
  213 + Form1.ResultMemo.Lines.Add('https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-models');
  214 + Form1.ResultMemo.Lines.Add('for how to download models');
328 215
329 - TransducerEncoder: AnsiString; // from icefall  
330 - TransducerDecoder: AnsiString;  
331 - TransducerJoiner: AnsiString; 216 + InitBtn.Enabled := True;
  217 + end;
  218 +end;
332 219
333 - NeMoTransducerEncoder: AnsiString;  
334 - NeMoTransducerDecoder: AnsiString;  
335 - NeMoTransducerJoiner: AnsiString; 220 +procedure TForm1.InitBtnClick(Sender: TObject);
  221 +var
  222 + ModelDir: AnsiString;
336 begin 223 begin
337 {$IFDEF DARWIN} 224 {$IFDEF DARWIN}
338 ModelDir := GetResourcesPath; 225 ModelDir := GetResourcesPath;
@@ -340,162 +227,10 @@ begin @@ -340,162 +227,10 @@ begin
340 ModelDir := './'; 227 ModelDir := './';
341 {$ENDIF} 228 {$ENDIF}
342 229
343 - VadFilename := ModelDir + 'silero_vad.onnx';  
344 - Tokens := ModelDir + 'tokens.txt';  
345 -  
346 - {  
347 - Please refer to  
348 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models  
349 - for a list of whisper models.  
350 -  
351 - In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt  
352 - You need to rename the existing model files.  
353 -  
354 - For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do  
355 - mv tiny.en-tokens.txt tokens.txt  
356 -  
357 - mv tiny.en-encoder.onnx whisper-encoder.onnx  
358 - mv tiny.en-decoder.onnx whisper-decoder.onnx  
359 -  
360 - // or use the int8.onnx  
361 -  
362 - mv tiny.en-encoder.int8.onnx whisper-encoder.onnx  
363 - mv tiny.en-decoder.int8.onnx whisper-decoder.onnx  
364 - }  
365 - WhisperEncoder := ModelDir + 'whisper-encoder.onnx';  
366 - WhisperDecoder := ModelDir + 'whisper-decoder.onnx';  
367 -  
368 -  
369 - {  
370 - Please refer to  
371 - https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models  
372 - to download models for SenseVoice.  
373 -  
374 - In the code, we use the normalized model name sense-voice.onnx. You have  
375 - to rename the downloaded model files.  
376 -  
377 - For example, you need to use  
378 -  
379 - mv model.onnx sense-voice.onnx  
380 -  
381 - // or use the int8.onnx  
382 - mv model.int8.onnx sense-voice.onnx  
383 - }  
384 -  
385 - SenseVoice := ModelDir + 'sense-voice.onnx';  
386 -  
387 - {  
388 - Please refer to  
389 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html  
390 - to download paraformer models.  
391 -  
392 - Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.  
393 - An example is given below for the rename:  
394 -  
395 - cp model.onnx paraformer.onnx  
396 -  
397 - // or use int8.onnx  
398 - cp model.int8.onnx paraformer.onnx  
399 - }  
400 - Paraformer := ModelDir + 'paraformer.onnx';  
401 -  
402 -  
403 - {  
404 - please refer to  
405 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html  
406 - to download TeleSpeech models.  
407 -  
408 - Note that you have to rename model files after downloading. The following  
409 - is an example  
410 -  
411 - mv model.onnx telespeech.onnx  
412 -  
413 - // or to use int8.onnx  
414 -  
415 - mv model.int8.onnx telespeech.onnx  
416 - }  
417 -  
418 - TeleSpeech := ModelDir + 'telespeech.onnx';  
419 -  
420 -  
421 - {  
422 - Please refer to  
423 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
424 - to download an icefall offline transducer model. Note that you need to rename the  
425 - model files to transducer-encoder.onnx, transducer-decoder.onnx, and  
426 - transducer-joiner.onnx  
427 - }  
428 - TransducerEncoder := ModelDir + 'transducer-encoder.onnx';  
429 - TransducerDecoder := ModelDir + 'transducer-decoder.onnx';  
430 - TransducerJoiner := ModelDir + 'transducer-joiner.onnx';  
431 -  
432 - {  
433 - Please visit  
434 - https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models  
435 - to donwload a NeMo transducer model.  
436 - }  
437 - NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';  
438 - NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';  
439 - NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';  
440 -  
441 - if not FileExists(VadFilename) then  
442 - begin  
443 - ShowMessage(VadFilename + ' does not exist! Please download it from' +  
444 - sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models'  
445 - );  
446 - Exit;  
447 - end;  
448 -  
449 - Self.Vad := CreateVad(VadFilename);  
450 -  
451 - if not FileExists(Tokens) then  
452 - begin  
453 - ShowMessage(Tokens + ' not found. Please download a non-streaming ASR model first!');  
454 - Exit;  
455 - end;  
456 -  
457 - if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then  
458 - begin  
459 - OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);  
460 - Msg := 'Whisper';  
461 - end  
462 - else if FileExists(SenseVoice) then  
463 - begin  
464 - OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);  
465 - Msg := 'SenseVoice';  
466 - end  
467 - else if FileExists(Paraformer) then  
468 - begin  
469 - OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);  
470 - Msg := 'Paraformer';  
471 - end  
472 - else if FileExists(TeleSpeech) then  
473 - begin  
474 - OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);  
475 - Msg := 'TeleSpeech';  
476 - end  
477 - else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then  
478 - begin  
479 - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,  
480 - TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');  
481 - Msg := 'Zipformer transducer';  
482 - end  
483 - else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then  
484 - begin  
485 - OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,  
486 - NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');  
487 - Msg := 'NeMo transducer';  
488 - end  
489 - else  
490 - begin  
491 - ShowMessage('Please download at least one non-streaming speech recognition model first.');  
492 - Exit;  
493 - end;  
494 -  
495 - MessageDlg('Congrat! The ' + Msg + ' model is initialized succesfully!', mtInformation, [mbOk], 0);  
496 - FileNameEdt.Enabled := True;  
497 - SelectFileBtn.Enabled := True;  
498 - InitBtn.Enabled := False; 230 + Form1.ResultMemo.Lines.Clear();
  231 + ResultMemo.Lines.Add('Initializing the model. Please wait...');
  232 + MyInitThread := TMyInitThread.Create(False, ModelDir);
  233 + InitBtn.Enabled := False;
499 end; 234 end;
500 235
501 end. 236 end.
@@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const { @@ -69,7 +69,7 @@ bool SileroVadModelConfig::Validate() const {
69 std::string SileroVadModelConfig::ToString() const { 69 std::string SileroVadModelConfig::ToString() const {
70 std::ostringstream os; 70 std::ostringstream os;
71 71
72 - os << "SilerVadModelConfig("; 72 + os << "SileroVadModelConfig(";
73 os << "model=\"" << model << "\", "; 73 os << "model=\"" << model << "\", ";
74 os << "threshold=" << threshold << ", "; 74 os << "threshold=" << threshold << ", ";
75 os << "min_silence_duration=" << min_silence_duration << ", "; 75 os << "min_silence_duration=" << min_silence_duration << ", ";
@@ -98,6 +98,7 @@ type @@ -98,6 +98,7 @@ type
98 destructor Destroy; override; 98 destructor Destroy; override;
99 procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); 99 procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
100 procedure InputFinished; 100 procedure InputFinished;
  101 + property GetHandle: Pointer Read Handle;
101 end; 102 end;
102 103
103 TSherpaOnnxOnlineRecognizer = class 104 TSherpaOnnxOnlineRecognizer = class
@@ -116,6 +117,7 @@ type @@ -116,6 +117,7 @@ type
116 function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; 117 function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean;
117 function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; 118 function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult;
118 property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config; 119 property Config: TSherpaOnnxOnlineRecognizerConfig Read _Config;
  120 + property GetHandle: Pointer Read Handle;
119 end; 121 end;
120 122
121 TSherpaOnnxOfflineTransducerModelConfig = record 123 TSherpaOnnxOfflineTransducerModelConfig = record
@@ -213,6 +215,7 @@ type @@ -213,6 +215,7 @@ type
213 constructor Create(P: Pointer); 215 constructor Create(P: Pointer);
214 destructor Destroy; override; 216 destructor Destroy; override;
215 procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); 217 procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer);
  218 + property GetHandle: Pointer Read Handle;
216 end; 219 end;
217 220
218 TSherpaOnnxOfflineRecognizer = class 221 TSherpaOnnxOfflineRecognizer = class
@@ -226,6 +229,7 @@ type @@ -226,6 +229,7 @@ type
226 procedure Decode(Stream: TSherpaOnnxOfflineStream); 229 procedure Decode(Stream: TSherpaOnnxOfflineStream);
227 function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; 230 function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
228 property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; 231 property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
  232 + property GetHandle: Pointer Read Handle;
229 end; 233 end;
230 234
231 TSherpaOnnxSileroVadModelConfig = record 235 TSherpaOnnxSileroVadModelConfig = record
@@ -262,6 +266,7 @@ type @@ -262,6 +266,7 @@ type
262 procedure Reset; 266 procedure Reset;
263 function Size: Integer; 267 function Size: Integer;
264 function Head: Integer; 268 function Head: Integer;
  269 + property GetHandle: Pointer Read Handle;
265 end; 270 end;
266 271
267 TSherpaOnnxSpeechSegment = record 272 TSherpaOnnxSpeechSegment = record
@@ -286,6 +291,7 @@ type @@ -286,6 +291,7 @@ type
286 procedure Reset; 291 procedure Reset;
287 procedure Flush; 292 procedure Flush;
288 property Config: TSherpaOnnxVadModelConfig Read _Config; 293 property Config: TSherpaOnnxVadModelConfig Read _Config;
  294 + property GetHandle: Pointer Read Handle;
289 end; 295 end;
290 296
291 { It supports reading a single channel wave with 16-bit encoded samples. 297 { It supports reading a single channel wave with 16-bit encoded samples.