mxnet
cudnn_cxx.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_CUDA_CUDNN_CXX_H_
25 #define MXNET_COMMON_CUDA_CUDNN_CXX_H_
26 
27 #include <mxnet/base.h>
28 #if MXNET_USE_CUDNN == 1
29 
30 #include <array>
31 #include <cstddef>
32 #include <cstdint>
33 #include <functional>
34 #include <memory>
35 
36 #if !defined(__CUDACC__) // Can be removed when CUDA 10 support is dropped.
37 #include <optional> // NOLINT(build/include_order)
38 #endif // !defined(__CUDACC__)
39 
40 #include <string>
41 #include <unordered_set>
42 #include <utility>
43 #include <vector>
44 
45 #include "utils.h"
46 
47 STATIC_ASSERT_CUDNN_VERSION_GE(8002);
48 
49 namespace mxnet {
50 namespace cudnn_cxx {
51 
52 struct DescriptorDestroyer {
53  using pointer = cudnnBackendDescriptor_t;
54 
55  void operator()(cudnnBackendDescriptor_t desc) {
56  CUDNN_CALL_NONFATAL(cudnnBackendDestroyDescriptor(desc));
57  }
58 };
59 
60 using Descriptor = std::unique_ptr<cudnnBackendDescriptor_t, DescriptorDestroyer>;
61 
62 struct WeakDescriptor {
63  cudnnBackendDescriptor_t desc = nullptr;
64 
65  explicit WeakDescriptor(const Descriptor& other) : desc(other.get()) {}
66  cudnnBackendDescriptor_t get() const {
67  return desc;
68  }
69 };
70 
71 template <typename T>
72 struct AttrType;
73 
74 template <>
75 struct AttrType<int64_t> {
76  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_INT64;
77 };
78 
79 template <>
80 struct AttrType<void*> {
81  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_VOID_PTR;
82 };
83 
84 template <>
85 struct AttrType<float> {
86  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_FLOAT;
87 };
88 
89 template <>
90 struct AttrType<double> {
91  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_DOUBLE;
92 };
93 
94 template <>
95 struct AttrType<cudnnHandle_t> {
96  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_HANDLE;
97 };
98 
99 template <>
100 struct AttrType<bool> {
101  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_BOOLEAN;
102 };
103 
104 template <>
105 struct AttrType<cudnnDataType_t> {
106  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_DATA_TYPE;
107 };
108 
109 template <>
110 struct AttrType<cudnnConvolutionMode_t> {
111  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_CONVOLUTION_MODE;
112 };
113 
114 template <>
115 struct AttrType<cudnnNanPropagation_t> {
116  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_NAN_PROPOGATION;
117 };
118 
119 template <>
120 struct AttrType<cudnnPointwiseMode_t> {
121  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_POINTWISE_MODE;
122 };
123 
124 template <>
125 struct AttrType<cudnnBackendHeurMode_t> {
126  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_HEUR_MODE;
127 };
128 
129 template <>
130 struct AttrType<cudnnBackendNumericalNote_t> {
131  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_NUMERICAL_NOTE;
132 };
133 
134 #if CUDNN_VERSION >= 8100
135 template <>
136 struct AttrType<cudnnReduceTensorOp_t> {
137  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_REDUCTION_OPERATOR_TYPE;
138 };
139 #if CUDNN_VERSION >= 8200
140 template <>
141 struct AttrType<cudnnBackendBehaviorNote_t> {
142  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_BEHAVIOR_NOTE;
143 };
144 #endif // CUDNN_VERSION >= 8200
145 #endif // CUDNN_VERSION >= 8100
146 
147 template <>
148 struct AttrType<cudnnBackendKnobType_t> {
149  static constexpr cudnnBackendAttributeType_t type = CUDNN_TYPE_KNOB_TYPE;
150 };
151 
152 void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, const Descriptor& val);
153 void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, const WeakDescriptor& val);
154 void SetAttr(const Descriptor& desc,
155  cudnnBackendAttributeName_t name,
156  const std::vector<Descriptor>& val);
157 
158 template <typename T>
159 void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, T val) {
160  CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, 1, &val));
161 }
162 
163 template <typename T>
164 void SetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name, const std::vector<T>& val) {
165  CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), val.data()));
166 }
167 
168 template <typename T, size_t N>
169 void SetAttr(const Descriptor& desc,
170  cudnnBackendAttributeName_t name,
171  const std::array<T, N>& val) {
172  CUDNN_CALL(cudnnBackendSetAttribute(desc.get(), name, AttrType<T>::type, val.size(), val.data()));
173 }
174 
175 inline void SetAttrs(const Descriptor& desc) {}
176 
177 template <typename T, typename... Attrs>
178 void SetAttrs(const Descriptor& desc, cudnnBackendAttributeName_t name, T&& val, Attrs&&... rest) {
179  SetAttr(desc, name, std::forward<T>(val));
180  SetAttrs(desc, std::forward<Attrs>(rest)...);
181 }
182 
183 std::vector<cudnnBackendDescriptor_t> MakeRawDescriptors(size_t n,
184  cudnnBackendDescriptorType_t type);
185 
186 Descriptor Make(cudnnBackendDescriptorType_t type);
187 
188 template <typename... Attrs>
189 Descriptor Make(cudnnBackendDescriptorType_t type, Attrs&&... attrs) {
190  auto desc = Make(type);
191  SetAttrs(desc, std::forward<Attrs>(attrs)...);
192  return desc;
193 }
194 
195 template <typename... Attrs>
196 Descriptor MakeFinalized(cudnnBackendDescriptorType_t type, Attrs&&... attrs) {
197  auto desc = Make(type, std::forward<Attrs>(attrs)...);
198  CUDNN_CALL(cudnnBackendFinalize(desc.get()));
199  return desc;
200 }
201 
202 template <typename T>
203 T GetAttr(const Descriptor& desc, cudnnBackendAttributeName_t name) {
204  T ret{};
205  int64_t ret_count = 0;
206  CUDNN_CALL(cudnnBackendGetAttribute(desc.get(), name, AttrType<T>::type, 1, &ret_count, &ret));
207  CHECK_EQ(ret_count, 1);
208  return ret;
209 }
210 
211 template <typename T>
212 std::vector<T> GetAllAttrs(const Descriptor& desc, cudnnBackendAttributeName_t name) {
213  int64_t count = 0;
214  CUDNN_CALL(cudnnBackendGetAttribute(desc.get(), name, AttrType<T>::type, 0, &count, nullptr));
215  std::vector<T> ret(count);
216  CUDNN_CALL(cudnnBackendGetAttribute(
217  desc.get(), name, AttrType<T>::type, ret.size(), &count, ret.data()));
218  return ret;
219 }
220 
221 template <typename T>
222 std::vector<T> GetSomeAttrs(size_t max_n,
223  const Descriptor& desc,
224  cudnnBackendAttributeName_t name) {
225  int64_t count = 0;
226  std::vector<T> ret(max_n);
227  CUDNN_CALL(cudnnBackendGetAttribute(
228  desc.get(), name, AttrType<T>::type, ret.size(), &count, ret.data()));
229  ret.resize(count);
230  return ret;
231 }
232 
233 Descriptor GetAttr(const Descriptor& desc,
234  cudnnBackendAttributeName_t name,
235  cudnnBackendDescriptorType_t type);
236 
237 std::vector<Descriptor> GetAllAttrs(const Descriptor& desc,
238  cudnnBackendAttributeName_t name,
239  cudnnBackendDescriptorType_t type);
240 
241 std::vector<Descriptor> GetSomeAttrs(size_t max_n,
242  const Descriptor& desc,
243  cudnnBackendAttributeName_t name,
244  cudnnBackendDescriptorType_t type);
245 
246 // Order sets layout, as a permutation of dims, with N,C,<spacial dims> being identity.
247 template <typename T>
248 std::vector<T> PackedStrides(const std::vector<size_t>& order, const std::vector<T>& dims) {
249  CHECK_EQ(order.size(), dims.size());
250  std::vector<T> ret(dims.size(), 1);
251  for (size_t i = dims.size() - 1; i--;)
252  ret[order[i]] = dims[order[i + 1]] * ret[order[i + 1]];
253  return ret;
254 }
255 
256 // Given an engine config's `notes`, return whether that config is compatible, i.e. does
257 // the config have all of the required notes and none of the notes that are being excluded.
258 template <typename Note>
259 inline bool IsCompatible(const std::vector<Note>& notes,
260  const std::vector<Note>& require_notes,
261  const std::vector<Note>& exclude_notes) {
262  for (auto rn : require_notes) {
263  auto it = std::find(notes.begin(), notes.end(), rn);
264  if (it == notes.end())
265  return false;
266  }
267  for (auto en : exclude_notes) {
268  auto it = std::find(notes.begin(), notes.end(), en);
269  if (it != notes.end())
270  return false;
271  }
272  return true;
273 }
274 
275 // Execution plans are returned in the order of cuDNN heurstics, i.e. from best to worst.
276 // - max_workspace is an out parameter - the maximum workspace requirement among returned plans,
277 // may be nullptr if not needed.
278 std::vector<Descriptor> GetPlans(cudnnBackendHeurMode_t h_mode,
279  cudnnHandle_t handle,
280  const Descriptor& op_graph,
281  size_t workspace_limit,
282  size_t* max_workspace,
283  const std::unordered_set<int64_t>& excl_engines,
284  const std::vector<cudnnBackendNumericalNote_t>& req_numeric,
285  const std::vector<cudnnBackendNumericalNote_t>& excl_numeric,
286 #if CUDNN_VERSION >= 8200
287  const std::vector<cudnnBackendBehaviorNote_t>& req_behavior,
288  const std::vector<cudnnBackendBehaviorNote_t>& excl_behavior,
289 #endif // CUDNN_VERSION >= 8200
290  bool verbose_filter);
291 
292 #if !defined(__CUDACC__) // Can be removed when CUDA 10 support is dropped.
293 
294 // Defines a sampling algorithm.
295 // Returns an aggregate value, to be used as a metric for time comparison, or std::nullopt to
296 // perform another time measurement.
297 using Sampler = std::function<std::optional<float>(float)>;
298 
299 // Return a sampler that after `n` trials returns the average.
300 // Before tallying trials, `warmups` trials are first ignored.
301 // If ever a trial that exceeds `max_cutoff_msec` is encountered (even during warmup),
302 // that trial is tallied and the sampling ends with the then-current trial average.
303 Sampler MakeAvgSampler(size_t n, float max_cutoff_msec = 1000.0, size_t warmups = 1);
304 
305 struct FindResult {
306  Descriptor plan;
307  size_t heur_i;
308  float time;
309 };
310 
311 // Executes and times the plans. The results are returned in the order from best to worst.
312 std::vector<FindResult> FindTopPlans(std::vector<Descriptor>&& plans,
313  size_t max_results,
314  cudnnHandle_t handle,
315  const Descriptor& var_pack,
316  Sampler sampler);
317 #endif // !defined(__CUDACC__)
318 
319 std::string PlanStr(const Descriptor& plan);
320 
321 } // namespace cudnn_cxx
322 } // namespace mxnet
323 
324 #endif // MXNET_USE_CUDNN == 1
325 
326 #endif // MXNET_COMMON_CUDA_CUDNN_CXX_H_
mxnet
namespace of mxnet
Definition: api_registry.h:33
base.h
configuration of MXNet as well as basic data structure.