Nickolay V. Shmyrev
Committed by GitHub

Proper convolution mode for fast GPU processing (#350)

... ... @@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
sess_opts.SetIntraOpNumThreads(num_threads);
sess_opts.SetInterOpNumThreads(num_threads);
// Other possible options
// sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED);
// sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
// sess_opts.EnableProfiling("profile");
switch (p) {
case Provider::kCPU:
break; // nothing to do for the CPU provider
... ... @@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
// The CUDA provider is available, proceed with setting the options
OrtCUDAProviderOptions options;
options.device_id = 0;
// Default OrtCudnnConvAlgoSearchExhaustive is extremely slow
options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic;
// set more options on need
sess_opts.AppendExecutionProvider_CUDA(options);
} else {
... ...