Committed by
GitHub
Proper convolution mode for fast GPU processing (#350)
正在显示
1 个修改的文件
包含
7 行增加
和
0 行删除
| @@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, | @@ -25,6 +25,11 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, | ||
| 25 | sess_opts.SetIntraOpNumThreads(num_threads); | 25 | sess_opts.SetIntraOpNumThreads(num_threads); |
| 26 | sess_opts.SetInterOpNumThreads(num_threads); | 26 | sess_opts.SetInterOpNumThreads(num_threads); |
| 27 | 27 | ||
| 28 | + // Other possible options | ||
| 29 | + // sess_opts.SetGraphOptimizationLevel(ORT_ENABLE_EXTENDED); | ||
| 30 | + // sess_opts.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE); | ||
| 31 | + // sess_opts.EnableProfiling("profile"); | ||
| 32 | + | ||
| 28 | switch (p) { | 33 | switch (p) { |
| 29 | case Provider::kCPU: | 34 | case Provider::kCPU: |
| 30 | break; // nothing to do for the CPU provider | 35 | break; // nothing to do for the CPU provider |
| @@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, | @@ -36,6 +41,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, | ||
| 36 | // The CUDA provider is available, proceed with setting the options | 41 | // The CUDA provider is available, proceed with setting the options |
| 37 | OrtCUDAProviderOptions options; | 42 | OrtCUDAProviderOptions options; |
| 38 | options.device_id = 0; | 43 | options.device_id = 0; |
| 44 | + // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow | ||
| 45 | + options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchHeuristic; | ||
| 39 | // set more options on need | 46 | // set more options on need |
| 40 | sess_opts.AppendExecutionProvider_CUDA(options); | 47 | sess_opts.AppendExecutionProvider_CUDA(options); |
| 41 | } else { | 48 | } else { |
-
请 注册 或 登录 后发表评论