mxnet
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
25 #ifndef MXNET_COMMON_UTILS_H_
26 #define MXNET_COMMON_UTILS_H_
27 
28 #include <dmlc/logging.h>
29 #include <dmlc/omp.h>
30 #include <nnvm/graph.h>
31 #include <mxnet/engine.h>
32 #include <mxnet/ndarray.h>
33 #include <mxnet/op_attr_types.h>
34 #include <mxnet/graph_attr_types.h>
35 #include <nnvm/graph_attr_types.h>
36 
37 #include <memory>
38 #include <vector>
39 #include <type_traits>
40 #include <utility>
41 #include <random>
42 #include <string>
43 #include <thread>
44 #include <algorithm>
45 #include <functional>
46 
47 #include "../operator/mxnet_op.h"
48 
49 namespace mxnet {
50 namespace common {
51 
52 
58  template<typename DType, typename IType>
59  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
60  const nnvm::dim_t end, const nnvm::dim_t idx_size) {
61  if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
62  (i == 0 && indptr[i] != 0) ||
63  (i == end - 1 && indptr[end] != idx_size))
64  *out = kCSRIndPtrErr;
65  }
66 };
67 
72 struct csr_idx_check {
73  template<typename DType, typename IType, typename RType>
74  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
75  const RType* indptr, const nnvm::dim_t ncols) {
76  for (RType j = indptr[i]; j < indptr[i+1]; j++) {
77  if (idx[j] >= ncols || idx[j] < 0 ||
78  (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
79  *out = kCSRIdxErr;
80  break;
81  }
82  }
83  }
84 };
85 
90 struct rsp_idx_check {
91  template<typename DType, typename IType>
92  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
93  const nnvm::dim_t end, const nnvm::dim_t nrows) {
94  if ((i < end && idx[i+1] <= idx[i])
95  || idx[i] < 0 || idx[i] >= nrows)
96  *out = kRSPIdxErr;
97  }
98 };
99 
100 template<typename xpu>
101 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
102  const TBlob &err_cpu, const bool full_check);
103 
112 template<typename xpu>
113 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
114  const TBlob &err_cpu, const bool full_check) {
115  using namespace op::mxnet_op;
116  CHECK_EQ(input.storage_type(), kCSRStorage)
117  << "CheckFormatCSRImpl is for CSRNDArray";
118  const TShape shape = input.shape();
119  const TShape idx_shape = input.aux_shape(csr::kIdx);
120  const TShape indptr_shape = input.aux_shape(csr::kIndPtr);
121  const TShape storage_shape = input.storage_shape();
122  if ((shape.ndim() != 2) ||
123  (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
124  (indptr_shape[0] != shape[0] + 1) ||
125  (idx_shape[0] != storage_shape[0])) {
126  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
127  DType* err = err_cpu.dptr<DType>();
128  *err = kCSRShapeErr;
129  });
130  return;
131  }
132  if (full_check) {
133  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
134  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
135  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
136  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
137  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
138  rctx.get_ctx(), false, err_cpu.type_flag_);
139  TBlob val_xpu = ret_xpu.data();
140  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
141  Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
142  input.aux_data(csr::kIndPtr).dptr<RType>(),
143  indptr_shape[0] - 1, idx_shape[0]);
144  // no need to check indices if indices are empty
145  if (idx_shape[0] != 0) {
146  Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
147  input.aux_data(csr::kIdx).dptr<IType>(),
148  input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
149  }
150  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
151  val_xpu.get<xpu, 1, DType>(s), s);
152  });
153  });
154  });
155  }
156 }
157 
166 template<typename xpu>
167 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
168  const TBlob &err_cpu, const bool full_check) {
169  using namespace op::mxnet_op;
170  CHECK_EQ(input.storage_type(), kRowSparseStorage)
171  << "CheckFormatRSPImpl is for RSPNDArray";
172  const TShape idx_shape = input.aux_shape(rowsparse::kIdx);
173  if (idx_shape[0] != input.storage_shape()[0]) {
174  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
175  DType* err = err_cpu.dptr<DType>();
176  *err = kRSPShapeErr;
177  });
178  return;
179  }
180  if (idx_shape[0] == 0) {
181  return;
182  }
183  if (full_check) {
184  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
185  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
186  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
187  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
188  rctx.get_ctx(), false, err_cpu.type_flag_);
189  TBlob val_xpu = ret_xpu.data();
190  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
191 
192  Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
193  val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
194  idx_shape[0] - 1, input.shape()[0]);
195  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
196  val_xpu.get<xpu, 1, DType>(s), s);
197  });
198  });
199  }
200 }
201 
202 template<typename xpu>
203 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
204  const TBlob &err_cpu, const bool full_check) {
205  int stype = input.storage_type();
206  if (stype == kCSRStorage) {
207  CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
208  } else if (stype == kRowSparseStorage) {
209  CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
210  } else if (stype == kDefaultStorage) {
211  // no-op for default storage
212  } else {
213  LOG(FATAL) << "Unknown storage type " << stype;
214  }
215 }
216 
217 
218 template<typename xpu>
219 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
220 
224 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
225  const NDArrayStorageType stype) {
226  if (!vstorage.empty()) {
227  for (const auto& i : vstorage) {
228  if (i != stype) return false;
229  }
230  return true;
231  }
232  return false;
233 }
234 
239 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
240  const NDArrayStorageType stype1,
241  const NDArrayStorageType stype2,
242  bool *has_both) {
243  if (has_both) {
244  *has_both = false;
245  }
246  if (!vstorage.empty()) {
247  uint8_t has = 0;
248  for (const auto i : vstorage) {
249  if (i == stype1) {
250  has |= 1;
251  } else if (i == stype2) {
252  has |= 2;
253  } else {
254  return false;
255  }
256  }
257  if (has_both) {
258  *has_both = has == 3;
259  }
260  return true;
261  }
262  return false;
263 }
264 
268 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
269  const NDArrayStorageType stype) {
270  if (!ndarrays.empty()) {
271  for (const auto& nd : ndarrays) {
272  if (nd.storage_type() != stype) {
273  return false;
274  }
275  }
276  return true;
277  }
278  return false;
279 }
280 
284 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
285  const NDArrayStorageType stype1,
286  const NDArrayStorageType stype2,
287  bool *has_both) {
288  if (has_both) {
289  *has_both = false;
290  }
291  if (!ndarrays.empty()) {
292  uint8_t has = 0;
293  for (const auto& nd : ndarrays) {
294  const NDArrayStorageType stype = nd.storage_type();
295  if (stype == stype1) {
296  has |= 1;
297  } else if (stype == stype2) {
298  has |= 2;
299  } else {
300  return false;
301  }
302  }
303  if (has_both) {
304  *has_both = has == 3;
305  }
306  return true;
307  }
308  return false;
309 }
310 
312 inline std::string dispatch_mode_string(const DispatchMode x) {
313  switch (x) {
314  case DispatchMode::kFCompute:
315  return "fcompute";
316  case DispatchMode::kFComputeEx:
317  return "fcompute_ex";
318  case DispatchMode::kFComputeFallback:
319  return "fcompute_fallback";
320  case DispatchMode::kVariable:
321  return "variable";
322  case DispatchMode::kUndefined:
323  return "undefined";
324  }
325  return "unknown";
326 }
327 
328 
330 inline std::string stype_string(const int x) {
331  switch (x) {
332  case kDefaultStorage:
333  return "default";
334  case kCSRStorage:
335  return "csr";
336  case kRowSparseStorage:
337  return "row_sparse";
338  }
339  return "unknown";
340 }
341 
342 // heuristic to dermine number of threads per GPU
343 inline int GetNumThreadPerGPU() {
344  // This is resource efficient option.
345  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
346 }
347 
348 // heuristic to get number of matching colors.
349 // this decides how much parallelism we can get in each GPU.
350 inline int GetExecNumMatchColor() {
351  // This is resource efficient option.
352  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
353  return std::min(num_match_color, GetNumThreadPerGPU());
354 }
355 
356 template<typename T, typename V>
357 V ParallelAccumulate(const T* a, const int n, V start) {
358  V sum = start;
359 #pragma omp parallel for reduction(+:sum)
360  for (int i = 0; i < n; ++i) {
361  sum += a[i];
362  }
363  return sum;
364 }
365 
373 template<typename RandomIt, typename Compare>
374 void ParallelSortHelper(RandomIt first, size_t len,
375  size_t grainsize, const Compare& comp) {
376  if (len < grainsize) {
377  std::sort(first, first+len, comp);
378  } else {
379  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
380  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
381  thr.join();
382  std::inplace_merge(first, first+len/2, first+len, comp);
383  }
384 }
385 
395 template<typename RandomIt, typename Compare>
396 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
397  const auto num = std::distance(first, last);
398  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
399  ParallelSortHelper(first, num, grainsize, comp);
400 }
401 
411 template<typename RandomIt>
412 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
413  ParallelSort(first, last, num_threads,
414  std::less<typename std::iterator_traits<RandomIt>::value_type>());
415 }
416 
420 typedef std::mt19937 RANDOM_ENGINE;
421 
425 namespace helper {
426 
430 template <class T>
431 struct UniqueIf {
435  using SingleObject = std::unique_ptr<T>;
436 };
437 
441 template <class T>
442 struct UniqueIf<T[]> {
446  using UnknownBound = std::unique_ptr<T[]>;
447 };
448 
452 template <class T, size_t kSize>
453 struct UniqueIf<T[kSize]> {
457  using KnownBound = void;
458 };
459 
460 } // namespace helper
461 
473 template <class T, class... Args>
475  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
476 }
477 
487 template <class T>
489  using U = typename std::remove_extent<T>::type;
490  return std::unique_ptr<T>(new U[n]{});
491 }
492 
501 template <class T, class... Args>
502 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
503 
504 template<typename FCompType>
505 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
506  const Context& ctx) {
507  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
508  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
509 
510  if (ctx.dev_mask() == cpu::kDevMask) {
511  return fcompute_cpu.get(op, nullptr);
512  } else if (ctx.dev_mask() == gpu::kDevMask) {
513  return fcompute_gpu.get(op, nullptr);
514  } else {
515  LOG(FATAL) << "Unknown device mask";
516  return nullptr;
517  }
518 }
519 
520 } // namespace common
521 } // namespace mxnet
522 #endif // MXNET_COMMON_UTILS_H_
Definition: ndarray.h:72
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:74
Definition: ndarray.h:61
NDArrayStorageType
Definition: ndarray.h:59
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:113
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:160
Definition: ndarray.h:52
NDArrayStorageType storage_type() const
Definition: ndarray.h:297
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:203
Additional operator attributes beside the ones provided by NNVM.
void KnownBound
Type of T.
Definition: utils.h:457
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:374
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:67
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:505
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:357
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:137
int GetNumThreadPerGPU()
Definition: utils.h:343
Definition: ndarray.h:70
execution time context. The information needed in runtime for actual execution.
Definition: base.h:253
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:107
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:330
Definition: ndarray.h:63
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
NDArray interface that handles array arithematics.
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:420
Definition: ndarray.h:68
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:90
Definition: ndarray.h:56
Definition: ndarray.h:71
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:312
Helper for non-array type T.
Definition: utils.h:431
bool ContainsOnlyStorage(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype1, const NDArrayStorageType stype2, bool *has_both)
returns true if the storage types of arrays in ndarrays are the same as targets stype1 or stype2...
Definition: utils.h:284
DType * dptr() const
get pointer in dtype
Definition: tensor_blob.h:216
Definition: ndarray.h:52
Data structures that can appear in graph attributes.
Definition: ndarray.h:62
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:57
Definition: ndarray.h:69
mshadow::cpu cpu
mxnet cpu
Definition: base.h:129
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:446
nnvm::Op Op
operator structure from NNVM
Definition: base.h:139
helper::UniqueIf< T >::KnownBound MakeUnique(Args &&...args)=delete
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:474
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:435
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:167
int GetExecNumMatchColor()
Definition: utils.h:350
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:92
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads)
Sort the elements in the range [first, last) into ascending order. The elements are compared using th...
Definition: utils.h:412
Context information about the execution environment.
Definition: base.h:142
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:72
ndarray interface
Definition: ndarray.h:79
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:59
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:59