mxnet
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
25 #ifndef MXNET_COMMON_UTILS_H_
26 #define MXNET_COMMON_UTILS_H_
27 
28 #include <dmlc/logging.h>
29 #include <dmlc/omp.h>
30 #include <nnvm/graph.h>
31 #include <mxnet/engine.h>
32 #include <mxnet/ndarray.h>
33 #include <mxnet/op_attr_types.h>
34 #include <mxnet/graph_attr_types.h>
35 #include <nnvm/graph_attr_types.h>
36 
37 #include <memory>
38 #include <vector>
39 #include <type_traits>
40 #include <utility>
41 #include <random>
42 #include <string>
43 #include <thread>
44 #include <algorithm>
45 #include <functional>
46 
47 #include "../operator/mxnet_op.h"
48 
49 namespace mxnet {
50 namespace common {
51 
57  template<typename DType, typename IType>
58  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
59  const nnvm::dim_t end, const nnvm::dim_t idx_size) {
60  if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
61  (i == 0 && indptr[i] != 0) ||
62  (i == end - 1 && indptr[end] != idx_size))
63  *out = kCSRIndPtrErr;
64  }
65 };
66 
71 struct csr_idx_check {
72  template<typename DType, typename IType, typename RType>
73  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
74  const RType* indptr, const nnvm::dim_t ncols) {
75  for (RType j = indptr[i]; j < indptr[i+1]; j++) {
76  if (idx[j] >= ncols || idx[j] < 0 ||
77  (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
78  *out = kCSRIdxErr;
79  break;
80  }
81  }
82  }
83 };
84 
89 struct rsp_idx_check {
90  template<typename DType, typename IType>
91  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
92  const nnvm::dim_t end, const nnvm::dim_t nrows) {
93  if ((i < end && idx[i+1] <= idx[i])
94  || idx[i] < 0 || idx[i] >= nrows)
95  *out = kRSPIdxErr;
96  }
97 };
98 
99 template<typename xpu>
100 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
101  const TBlob &err_cpu, const bool full_check);
102 
111 template<typename xpu>
112 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
113  const TBlob &err_cpu, const bool full_check) {
114  using namespace op::mxnet_op;
115  CHECK_EQ(input.storage_type(), kCSRStorage)
116  << "CheckFormatCSRImpl is for CSRNDArray";
117  const TShape shape = input.shape();
118  const TShape idx_shape = input.aux_shape(csr::kIdx);
119  const TShape indptr_shape = input.aux_shape(csr::kIndPtr);
120  const TShape storage_shape = input.storage_shape();
121  if ((shape.ndim() != 2) ||
122  (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
123  (indptr_shape[0] != shape[0] + 1) ||
124  (idx_shape[0] != storage_shape[0])) {
125  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
126  DType* err = err_cpu.dptr<DType>();
127  *err = kCSRShapeErr;
128  });
129  return;
130  }
131  if (full_check) {
132  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
133  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
134  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
135  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
136  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
137  rctx.get_ctx(), false, err_cpu.type_flag_);
138  TBlob val_xpu = ret_xpu.data();
139  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
140  Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
141  input.aux_data(csr::kIndPtr).dptr<RType>(),
142  indptr_shape[0] - 1, idx_shape[0]);
143  // no need to check indices if indices are empty
144  if (idx_shape[0] != 0) {
145  Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
146  input.aux_data(csr::kIdx).dptr<IType>(),
147  input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
148  }
149  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
150  val_xpu.get<xpu, 1, DType>(s), s);
151  });
152  });
153  });
154  }
155 }
156 
165 template<typename xpu>
166 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
167  const TBlob &err_cpu, const bool full_check) {
168  using namespace op::mxnet_op;
169  CHECK_EQ(input.storage_type(), kRowSparseStorage)
170  << "CheckFormatRSPImpl is for RSPNDArray";
171  const TShape idx_shape = input.aux_shape(rowsparse::kIdx);
172  if (idx_shape[0] != input.storage_shape()[0]) {
173  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
174  DType* err = err_cpu.dptr<DType>();
175  *err = kRSPShapeErr;
176  });
177  return;
178  }
179  if (idx_shape[0] == 0) {
180  return;
181  }
182  if (full_check) {
183  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
184  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
185  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
186  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
187  rctx.get_ctx(), false, err_cpu.type_flag_);
188  TBlob val_xpu = ret_xpu.data();
189  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
190 
191  Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
192  val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
193  idx_shape[0] - 1, input.shape()[0]);
194  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
195  val_xpu.get<xpu, 1, DType>(s), s);
196  });
197  });
198  }
199 }
200 
201 template<typename xpu>
202 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
203  const TBlob &err_cpu, const bool full_check) {
204  int stype = input.storage_type();
205  if (stype == kCSRStorage) {
206  CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
207  } else if (stype == kRowSparseStorage) {
208  CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
209  } else if (stype == kDefaultStorage) {
210  // no-op for default storage
211  } else {
212  LOG(FATAL) << "Unknown storage type " << stype;
213  }
214 }
215 
216 
217 template<typename xpu>
218 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
219 
223 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
224  const NDArrayStorageType stype) {
225  if (!vstorage.empty()) {
226  for (const auto& i : vstorage) {
227  if (i != stype) return false;
228  }
229  return true;
230  }
231  return false;
232 }
233 
238 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
239  const NDArrayStorageType stype1,
240  const NDArrayStorageType stype2,
241  bool *has_both) {
242  if (has_both) {
243  *has_both = false;
244  }
245  if (!vstorage.empty()) {
246  uint8_t has = 0;
247  for (const auto i : vstorage) {
248  if (i == stype1) {
249  has |= 1;
250  } else if (i == stype2) {
251  has |= 2;
252  } else {
253  return false;
254  }
255  }
256  if (has_both) {
257  *has_both = has == 3;
258  }
259  return true;
260  }
261  return false;
262 }
263 
267 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
268  const NDArrayStorageType stype) {
269  if (!ndarrays.empty()) {
270  for (const auto& nd : ndarrays) {
271  if (nd.storage_type() != stype) {
272  return false;
273  }
274  }
275  return true;
276  }
277  return false;
278 }
279 
283 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
284  const NDArrayStorageType stype1,
285  const NDArrayStorageType stype2,
286  bool *has_both) {
287  if (has_both) {
288  *has_both = false;
289  }
290  if (!ndarrays.empty()) {
291  uint8_t has = 0;
292  for (const auto& nd : ndarrays) {
293  const NDArrayStorageType stype = nd.storage_type();
294  if (stype == stype1) {
295  has |= 1;
296  } else if (stype == stype2) {
297  has |= 2;
298  } else {
299  return false;
300  }
301  }
302  if (has_both) {
303  *has_both = has == 3;
304  }
305  return true;
306  }
307  return false;
308 }
309 
311 inline std::string dispatch_mode_string(const DispatchMode x) {
312  switch (x) {
314  return "fcompute";
316  return "fcompute_ex";
318  return "fcompute_fallback";
320  return "variable";
322  return "undefined";
323  }
324  return "unknown";
325 }
326 
327 
329 inline std::string stype_string(const int x) {
330  switch (x) {
331  case kDefaultStorage:
332  return "default";
333  case kCSRStorage:
334  return "csr";
335  case kRowSparseStorage:
336  return "row_sparse";
337  }
338  return "unknown";
339 }
340 
341 // heuristic to dermine number of threads per GPU
342 inline int GetNumThreadPerGPU() {
343  // This is resource efficient option.
344  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
345 }
346 
347 // heuristic to get number of matching colors.
348 // this decides how much parallelism we can get in each GPU.
349 inline int GetExecNumMatchColor() {
350  // This is resource efficient option.
351  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
352  return std::min(num_match_color, GetNumThreadPerGPU());
353 }
354 
355 template<typename T, typename V>
356 V ParallelAccumulate(const T* a, const int n, V start) {
357  V sum = start;
358 #pragma omp parallel for reduction(+:sum)
359  for (int i = 0; i < n; ++i) {
360  sum += a[i];
361  }
362  return sum;
363 }
364 
372 template<typename RandomIt, typename Compare>
373 void ParallelSortHelper(RandomIt first, size_t len,
374  size_t grainsize, const Compare& comp) {
375  if (len < grainsize) {
376  std::sort(first, first+len, comp);
377  } else {
378  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
379  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
380  thr.join();
381  std::inplace_merge(first, first+len/2, first+len, comp);
382  }
383 }
384 
394 template<typename RandomIt, typename Compare>
395 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
396  const auto num = std::distance(first, last);
397  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
398  ParallelSortHelper(first, num, grainsize, comp);
399 }
400 
410 template<typename RandomIt>
411 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
412  ParallelSort(first, last, num_threads,
413  std::less<typename std::iterator_traits<RandomIt>::value_type>());
414 }
415 
419 typedef std::mt19937 RANDOM_ENGINE;
420 
424 namespace helper {
425 
429 template <class T>
430 struct UniqueIf {
434  using SingleObject = std::unique_ptr<T>;
435 };
436 
440 template <class T>
441 struct UniqueIf<T[]> {
445  using UnknownBound = std::unique_ptr<T[]>;
446 };
447 
451 template <class T, size_t kSize>
452 struct UniqueIf<T[kSize]> {
456  using KnownBound = void;
457 };
458 
459 } // namespace helper
460 
472 template <class T, class... Args>
474  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
475 }
476 
486 template <class T>
488  using U = typename std::remove_extent<T>::type;
489  return std::unique_ptr<T>(new U[n]{});
490 }
491 
500 template <class T, class... Args>
501 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
502 
503 template<typename FCompType>
504 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
505  const Context& ctx) {
506  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
507  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
508 
509  if (ctx.dev_mask() == cpu::kDevMask) {
510  return fcompute_cpu.get(op, nullptr);
511  } else if (ctx.dev_mask() == gpu::kDevMask) {
512  return fcompute_gpu.get(op, nullptr);
513  } else {
514  LOG(FATAL) << "Unknown device mask";
515  return nullptr;
516  }
517 }
518 
519 } // namespace common
520 } // namespace mxnet
521 #endif // MXNET_COMMON_UTILS_H_
Symbol min(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2219
Definition: ndarray.h:72
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:73
Definition: ndarray.h:61
NDArrayStorageType
Definition: ndarray.h:59
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:112
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:160
Definition: ndarray.h:52
NDArrayStorageType storage_type() const
Definition: ndarray.h:297
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:202
const TShape & storage_shape() const
Definition: ndarray.h:204
namespace of mxnet
Definition: base.h:127
Additional operator attributes beside the ones provided by NNVM.
void KnownBound
Type of T.
Definition: utils.h:456
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:373
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:67
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:504
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:356
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:137
int GetNumThreadPerGPU()
Definition: utils.h:342
Definition: ndarray.h:70
execution time context. The information needed in runtime for actual execution.
Definition: base.h:253
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:105
Symbol max(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2182
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:329
Definition: ndarray.h:63
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:395
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:223
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:419
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:89
Definition: ndarray.h:56
const TShape & shape() const
Definition: ndarray.h:196
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:311
Helper for non-array type T.
Definition: utils.h:430
Definition: ndarray.h:52
Data structures that can appear in graph attributes.
Definition: ndarray.h:62
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:56
Definition: ndarray.h:69
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:445
nnvm::Op Op
operator structure from NNVM
Definition: base.h:139
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:2439
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:434
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:166
int GetExecNumMatchColor()
Definition: utils.h:349
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:91
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:473
Context information about the execution environment.
Definition: base.h:142
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:71
const TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:216
ndarray interface
Definition: ndarray.h:79
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:58
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:59
Symbol sum(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:1993