Nickolay V. Shmyrev
Committed by GitHub

Proper convolution mode for fast GPU processing (#350)

@@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, @@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
25 sess_opts.SetIntraOpNumThreads(num_threads); 25 sess_opts.SetIntraOpNumThreads(num_threads);
26 sess_opts.SetInterOpNumThreads(num_threads); 26 sess_opts.SetInterOpNumThreads(num_threads);
27 27
  28 + // Other possible options
  29 + // sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
  30 + // sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
  31 + // sess_opts.EnableProfiling("profile");
  32 +
28 switch (p) { 33 switch (p) {
29 case Provider::kCPU: 34 case Provider::kCPU:
30 break; // nothing to do for the CPU provider 35 break; // nothing to do for the CPU provider
@@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, @@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
36 // The CUDA provider is available, proceed with setting the options 41 // The CUDA provider is available, proceed with setting the options
37 OrtCUDAProviderOptions options; 42 OrtCUDAProviderOptions options;
38 options.device_id = 0; 43 options.device_id = 0;
  44 + // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
  45 + options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
39 // set more options on need 46 // set more options on need
40 sess_opts.AppendExecutionProvider_CUDA(options); 47 sess_opts.AppendExecutionProvider_CUDA(options);
41 } else { 48 } else {