mxnet
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
25 #ifndef MXNET_COMMON_UTILS_H_
26 #define MXNET_COMMON_UTILS_H_
27 
28 #include <dmlc/logging.h>
29 #include <dmlc/omp.h>
30 #include <nnvm/graph.h>
31 #include <nnvm/node.h>
32 #include <mxnet/engine.h>
33 #include <mxnet/ndarray.h>
34 #include <mxnet/imperative.h>
35 #include <mxnet/op_attr_types.h>
36 #include <mxnet/graph_attr_types.h>
37 #include <nnvm/graph_attr_types.h>
38 
39 #include <memory>
40 #include <vector>
41 #include <type_traits>
42 #include <utility>
43 #include <random>
44 #include <string>
45 #include <thread>
46 #include <algorithm>
47 #include <functional>
48 #include <limits>
49 
50 #include "../operator/mxnet_op.h"
51 #if MXNET_USE_MKLDNN == 1
52 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
53 #endif
54 
55 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
56 #include <windows.h>
57 #else
58 #include <unistd.h>
59 #endif
60 
61 
62 namespace mxnet {
63 namespace common {
64 
65 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
66 inline size_t current_process_id() { return ::GetCurrentProcessId(); }
67 #else
68 inline size_t current_process_id() { return getpid(); }
69 #endif
70 
75  template<typename DType, typename IType>
76  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
77  const nnvm::dim_t end, const nnvm::dim_t idx_size) {
78  if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
79  (i == 0 && indptr[i] != 0) ||
80  (i == end - 1 && indptr[end] != idx_size))
81  *out = kCSRIndPtrErr;
82  }
83 };
84 
89 struct csr_idx_check {
90  template<typename DType, typename IType, typename RType>
91  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
92  const RType* indptr, const nnvm::dim_t ncols) {
93  for (RType j = indptr[i]; j < indptr[i+1]; j++) {
94  if (idx[j] >= ncols || idx[j] < 0 ||
95  (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
96  *out = kCSRIdxErr;
97  break;
98  }
99  }
100  }
101 };
102 
108  template<typename DType, typename IType>
109  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
110  const nnvm::dim_t end, const nnvm::dim_t nrows) {
111  if ((i < end && idx[i+1] <= idx[i])
112  || idx[i] < 0 || idx[i] >= nrows)
113  *out = kRSPIdxErr;
114  }
115 };
116 
117 template<typename xpu>
118 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
119  const TBlob &err_cpu, const bool full_check);
120 
129 template<typename xpu>
130 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
131  const TBlob &err_cpu, const bool full_check) {
132  using namespace op::mxnet_op;
133  CHECK_EQ(input.storage_type(), kCSRStorage)
134  << "CheckFormatCSRImpl is for CSRNDArray";
135  const mxnet::TShape shape = input.shape();
136  const mxnet::TShape idx_shape = input.aux_shape(csr::kIdx);
137  const mxnet::TShape indptr_shape = input.aux_shape(csr::kIndPtr);
138  const mxnet::TShape storage_shape = input.storage_shape();
139  if ((shape.ndim() != 2) ||
140  (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
141  (indptr_shape[0] != shape[0] + 1) ||
142  (idx_shape[0] != storage_shape[0])) {
143  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
144  DType* err = err_cpu.dptr<DType>();
145  *err = kCSRShapeErr;
146  });
147  return;
148  }
149  if (full_check) {
150  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
151  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
152  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
153  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
154  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
155  rctx.get_ctx(), false, err_cpu.type_flag_);
156  TBlob val_xpu = ret_xpu.data();
157  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
158  Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
159  input.aux_data(csr::kIndPtr).dptr<RType>(),
160  indptr_shape[0] - 1, idx_shape[0]);
161  // no need to check indices if indices are empty
162  if (idx_shape[0] != 0) {
163  Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
164  input.aux_data(csr::kIdx).dptr<IType>(),
165  input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
166  }
167  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
168  val_xpu.get<xpu, 1, DType>(s), s);
169  });
170  });
171  });
172  }
173 }
174 
183 template<typename xpu>
184 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
185  const TBlob &err_cpu, const bool full_check) {
186  using namespace op::mxnet_op;
187  CHECK_EQ(input.storage_type(), kRowSparseStorage)
188  << "CheckFormatRSPImpl is for RSPNDArray";
189  const mxnet::TShape idx_shape = input.aux_shape(rowsparse::kIdx);
190  if (idx_shape[0] != input.storage_shape()[0]) {
191  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
192  DType* err = err_cpu.dptr<DType>();
193  *err = kRSPShapeErr;
194  });
195  return;
196  }
197  if (idx_shape[0] == 0) {
198  return;
199  }
200  if (full_check) {
201  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
202  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
203  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
204  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
205  rctx.get_ctx(), false, err_cpu.type_flag_);
206  TBlob val_xpu = ret_xpu.data();
207  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
208 
209  Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
210  val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
211  idx_shape[0] - 1, input.shape()[0]);
212  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
213  val_xpu.get<xpu, 1, DType>(s), s);
214  });
215  });
216  }
217 }
218 
219 template<typename xpu>
220 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
221  const TBlob &err_cpu, const bool full_check) {
222  int stype = input.storage_type();
223  if (stype == kCSRStorage) {
224  CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
225  } else if (stype == kRowSparseStorage) {
226  CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
227  } else if (stype == kDefaultStorage) {
228  // no-op for default storage
229  } else {
230  LOG(FATAL) << "Unknown storage type " << stype;
231  }
232 }
233 
237 template<typename xpu>
239  const NDArray& input_nd,
240  const TBlob& idx_data,
241  const OpReqType req,
242  NDArray* output_nd);
243 
244 /* \brief Casts tensor storage type to the new type.
245  */
246 template<typename xpu>
247 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
248 
252 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
253  const NDArrayStorageType stype) {
254  if (!vstorage.empty()) {
255  for (const auto& i : vstorage) {
256  if (i != stype) return false;
257  }
258  return true;
259  }
260  return false;
261 }
262 
267 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
268  const NDArrayStorageType stype1,
269  const NDArrayStorageType stype2,
270  bool *has_both) {
271  if (has_both) {
272  *has_both = false;
273  }
274  if (!vstorage.empty()) {
275  uint8_t has = 0;
276  for (const auto i : vstorage) {
277  if (i == stype1) {
278  has |= 1;
279  } else if (i == stype2) {
280  has |= 2;
281  } else {
282  return false;
283  }
284  }
285  if (has_both) {
286  *has_both = has == 3;
287  }
288  return true;
289  }
290  return false;
291 }
292 
296 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
297  const NDArrayStorageType stype) {
298  if (!ndarrays.empty()) {
299  for (const auto& nd : ndarrays) {
300  if (nd.storage_type() != stype) {
301  return false;
302  }
303  }
304  return true;
305  }
306  return false;
307 }
308 
312 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
313  const NDArrayStorageType stype1,
314  const NDArrayStorageType stype2,
315  bool *has_both) {
316  if (has_both) {
317  *has_both = false;
318  }
319  if (!ndarrays.empty()) {
320  uint8_t has = 0;
321  for (const auto& nd : ndarrays) {
322  const NDArrayStorageType stype = nd.storage_type();
323  if (stype == stype1) {
324  has |= 1;
325  } else if (stype == stype2) {
326  has |= 2;
327  } else {
328  return false;
329  }
330  }
331  if (has_both) {
332  *has_both = has == 3;
333  }
334  return true;
335  }
336  return false;
337 }
338 
342 inline bool ContainsStorageType(const std::vector<NDArray>& ndarrays,
343  const NDArrayStorageType stype) {
344  if (!ndarrays.empty()) {
345  for (const auto& nd : ndarrays) {
346  if (nd.storage_type() == stype) {
347  return true;
348  }
349  }
350  }
351  return false;
352 }
353 
357 inline bool ContainsStorageType(const std::vector<int>& ndstypes,
358  const NDArrayStorageType stype) {
359  if (!ndstypes.empty()) {
360  for (const auto& ndstype : ndstypes) {
361  if (ndstype == stype) {
362  return true;
363  }
364  }
365  }
366  return false;
367 }
368 
370 inline std::string dispatch_mode_string(const DispatchMode x) {
371  switch (x) {
373  return "fcompute";
375  return "fcompute_ex";
377  return "fcompute_fallback";
379  return "variable";
381  return "undefined";
382  }
383  return "unknown";
384 }
385 
386 
388 inline std::string stype_string(const int x) {
389  switch (x) {
390  case kDefaultStorage:
391  return "default";
392  case kCSRStorage:
393  return "csr";
394  case kRowSparseStorage:
395  return "row_sparse";
396  }
397  return "unknown";
398 }
399 
401 inline std::string dev_type_string(const int dev_type) {
402  switch (dev_type) {
403  case Context::kCPU:
404  return "cpu";
405  case Context::kGPU:
406  return "gpu";
407  case Context::kCPUPinned:
408  return "cpu_pinned";
409  case Context::kCPUShared:
410  return "cpu_shared";
411  }
412  return "unknown";
413 }
414 
415 inline std::string attr_value_string(const nnvm::NodeAttrs& attrs,
416  const std::string& attr_name,
417  std::string default_val = "") {
418  if (attrs.dict.find(attr_name) == attrs.dict.end()) {
419  return default_val;
420  }
421  return attrs.dict.at(attr_name);
422 }
423 
425 inline std::string operator_stype_string(const nnvm::NodeAttrs& attrs,
426  const int dev_mask,
427  const std::vector<int>& in_attrs,
428  const std::vector<int>& out_attrs) {
429  std::ostringstream os;
430  os << "operator = " << attrs.op->name
431  << "\ninput storage types = [";
432  for (const int attr : in_attrs) {
433  os << stype_string(attr) << ", ";
434  }
435  os << "]\n"
436  << "output storage types = [";
437  for (const int attr : out_attrs) {
438  os << stype_string(attr) << ", ";
439  }
440  os << "]\n"
441  << "params = {";
442  for (auto kv : attrs.dict) {
443  os << "\"" << kv.first << "\" : " << kv.second << ", ";
444  }
445  os << "}\n"
446  << "context.dev_mask = " << dev_type_string(dev_mask);
447  return os.str();
448 }
449 
451 inline std::string operator_string(const nnvm::NodeAttrs& attrs,
452  const OpContext& ctx,
453  const std::vector<NDArray>& inputs,
454  const std::vector<OpReqType>& req,
455  const std::vector<NDArray>& outputs) {
456  std::string result = "";
457  std::vector<int> in_stypes;
458  std::vector<int> out_stypes;
459  in_stypes.reserve(inputs.size());
460  out_stypes.reserve(outputs.size());
461  auto xform = [](const NDArray arr) -> int { return arr.storage_type(); };
462  std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
463  std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
464  result += operator_stype_string(attrs, ctx.run_ctx.ctx.dev_mask(), in_stypes, out_stypes);
465  return result;
466 }
467 
469 inline void LogOnce(const std::string& message) {
471  auto log_store = LogStore::Get();
472  if (log_store->find(message) == log_store->end()) {
473  LOG(INFO) << message;
474  log_store->insert(message);
475  }
476 }
477 
480 inline void LogStorageFallback(const nnvm::NodeAttrs& attrs,
481  const int dev_mask,
482  const std::vector<int>* in_attrs,
483  const std::vector<int>* out_attrs) {
484  static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
485  if (!log) return;
486  const std::string op_str = operator_stype_string(attrs, dev_mask, *in_attrs, *out_attrs);
487  std::ostringstream os;
488  const char* warning = "\nThe operator with default storage type will be dispatched "
489  "for execution. You're seeing this warning message because the operator above is unable "
490  "to process the given ndarrays with specified storage types, context and parameter. "
491  "Temporary dense ndarrays are generated in order to execute the operator. "
492  "This does not affect the correctness of the programme. "
493  "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "
494  "0 to suppress this warning.";
495  os << "\nStorage type fallback detected:\n" << op_str << warning;
496  LogOnce(os.str());
497 #if MXNET_USE_MKLDNN == 1
498  if (!MKLDNNEnvSet()) common::LogOnce("MXNET_MKLDNN_ENABLED flag is off. "
499  "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
500  if (GetMKLDNNCacheSize() != -1) common::LogOnce("MXNET_MKLDNN_CACHE_NUM is set."
501  "Should only be set if "
502  "your model has variable input shapes, "
503  "as cache size may grow unbounded");
504 #endif
505 }
506 
507 // heuristic to dermine number of threads per GPU
508 inline int GetNumThreadsPerGPU() {
509  // This is resource efficient option.
510  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
511 }
512 
513 // heuristic to get number of matching colors.
514 // this decides how much parallelism we can get in each GPU.
515 inline int GetExecNumMatchColor() {
516  // This is resource efficient option.
517  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
518  return std::min(num_match_color, GetNumThreadsPerGPU());
519 }
520 
521 template<typename T, typename V>
522 V ParallelAccumulate(const T* a, const int n, V start) {
523  V sum = start;
524 #pragma omp parallel for reduction(+:sum)
525  for (int i = 0; i < n; ++i) {
526  sum += a[i];
527  }
528  return sum;
529 }
530 
538 template<typename RandomIt, typename Compare>
539 void ParallelSortHelper(RandomIt first, size_t len,
540  size_t grainsize, const Compare& comp) {
541  if (len < grainsize) {
542  std::sort(first, first+len, comp);
543  } else {
544  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
545  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
546  thr.join();
547  std::inplace_merge(first, first+len/2, first+len, comp);
548  }
549 }
550 
560 template<typename RandomIt, typename Compare>
561 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
562  const auto num = std::distance(first, last);
563  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
564  ParallelSortHelper(first, num, grainsize, comp);
565 }
566 
576 template<typename RandomIt>
577 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
578  ParallelSort(first, last, num_threads,
579  std::less<typename std::iterator_traits<RandomIt>::value_type>());
580 }
581 
585 typedef std::mt19937 RANDOM_ENGINE;
586 
590 namespace helper {
591 
595 template <class T>
596 struct UniqueIf {
600  using SingleObject = std::unique_ptr<T>;
601 };
602 
606 template <class T>
607 struct UniqueIf<T[]> {
611  using UnknownBound = std::unique_ptr<T[]>;
612 };
613 
617 template <class T, size_t kSize>
618 struct UniqueIf<T[kSize]> {
622  using KnownBound = void;
623 };
624 
625 } // namespace helper
626 
638 template <class T, class... Args>
640  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
641 }
642 
652 template <class T>
654  using U = typename std::remove_extent<T>::type;
655  return std::unique_ptr<T>(new U[n]{});
656 }
657 
666 template <class T, class... Args>
667 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
668 
669 template<typename FCompType>
670 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
671  const Context& ctx) {
672  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
673  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
674 
675  if (ctx.dev_mask() == cpu::kDevMask) {
676  return fcompute_cpu.get(op, nullptr);
677  } else if (ctx.dev_mask() == gpu::kDevMask) {
678  return fcompute_gpu.get(op, nullptr);
679  } else {
680  LOG(FATAL) << "Unknown device mask " << ctx.dev_mask();
681  return nullptr;
682  }
683 }
684 
688 template <typename T>
689 constexpr size_t MaxIntegerValue() {
690  return std::is_integral<T>::value ?
691  std::numeric_limits<T>::max():
692  size_t(2) << (std::numeric_limits<T>::digits - 1);
693 }
694 
695 template <>
696 constexpr size_t MaxIntegerValue<mshadow::half::half_t>() {
697  return size_t(2) << 10;
698 }
699 
700 template <>
701 constexpr size_t MaxIntegerValue<mshadow::bfloat::bf16_t>() {
702  return size_t(2) << 14;
703 }
704 
705 MSHADOW_XINLINE int ilog2ul(size_t a) {
706  int k = 1;
707  while (a >>= 1) ++k;
708  return k;
709 }
710 
711 MSHADOW_XINLINE int ilog2ui(unsigned int a) {
712  int k = 1;
713  while (a >>= 1) ++k;
714  return k;
715 }
716 
720 inline NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
721  const Context &ctx, const int dtype) {
722  // NDArray with default storage
723  if (stype == kDefaultStorage) {
724  NDArray ret(shape, ctx, false, dtype);
725  ret = 0;
726  return ret;
727  }
728  // NDArray with non-default storage. Storage allocation is always delayed.
729  return NDArray(stype, shape, ctx, true, dtype);
730 }
731 
735 inline void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
736  const Context &ctx, const int dtype,
737  std::vector<NDArray> *vec) {
738  // NDArray with default storage
739  if (stype == kDefaultStorage) {
740  vec->emplace_back(shape, ctx, false, dtype);
741  vec->back() = 0;
742  } else {
743  // NDArray with non-default storage. Storage allocation is always delayed.
744  vec->emplace_back(stype, shape, ctx, true, dtype);
745  }
746 }
747 
748 
752 template<typename DType>
753 inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
754  static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
755  if (size >= copy_block_size) {
756  #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
757  for (index_t i = 0; i < size; ++i) {
758  dst[i] = src[i];
759  }
760  } else {
761  std::memcpy(dst, src, sizeof(DType) * size);
762  }
763 }
764 
768 template<typename DType>
769 inline void ParallelAdd(DType* dst, const DType* src, index_t size) {
770  static index_t add_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
771  if (size >= add_block_size) {
772  #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
773  for (index_t i = 0; i < size; ++i) {
774  dst[i] += src[i];
775  }
776  } else {
777  for (index_t i = 0; i < size; ++i) {
778  dst[i] += src[i];
779  }
780  }
781 }
782 
801 inline void ConvertToNumpyShape(mxnet::TShape* shape) {
802  if (shape->ndim() == 0) { // legacy shape ndim = 0 means unknown
803  *shape = mxnet::TShape(); // unknown shape ndim = -1
804  } else {
805  for (int j = 0; j < shape->ndim(); ++j) {
806  if ((*shape)[j] == 0) { // legacy shape dim_size = 0 means unknown
807  (*shape)[j] = -1; // unknown dim size = -1
808  }
809  }
810  }
811 }
812 
814  for (size_t i = 0; i < shapes->size(); ++i) {
815  ConvertToNumpyShape(&(shapes->at(i)));
816  }
817 }
818 
823 inline void ConvertToLegacyShape(mxnet::TShape* shape) {
824  if (!mxnet::ndim_is_known(*shape)) {
825  *shape = mxnet::TShape(0, -1);
826  } else {
827  for (int j = 0; j < shape->ndim(); ++j) {
828  if (!mxnet::dim_size_is_known(*shape, j)) {
829  (*shape)[j] = 0;
830  }
831  }
832  }
833 }
834 
836  for (size_t i = 0; i < shapes->size(); ++i) {
837  ConvertToLegacyShape(&(shapes->at(i)));
838  }
839 }
841  const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
842  size_t nid, const std::function<void(const char *, const char *, void *)>
843  &monitor_callback);
844 
846  const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
847  size_t nid, const std::function<void(const char *, const char *, void *)>
848  &monitor_callback);
849 
853 static inline std::string GetOutputName(const nnvm::NodeEntry& e) {
854  nnvm::Symbol sym;
855  sym.outputs.push_back(e);
856  return sym.ListOutputNames()[0];
857 }
858 
860  // convert negative axes to positive values
861  const int ndim = src.ndim();
862  mxnet::TShape axes = src;
863  for (int i = 0; i < ndim; ++i) {
864  if (axes[i] < 0) {
865  axes[i] += ndim;
866  }
867  CHECK(axes[i] >= 0 && axes[i] < ndim) << "axes[" << i << "]="
868  << axes[i] << " exceeds the range ["
869  << 0 << ", " << ndim << ")";
870  }
871  return axes;
872 }
873 
874 inline bool is_float(const int dtype) {
875  return dtype == mshadow::kFloat32 || dtype == mshadow::kFloat64 || dtype == mshadow::kFloat16;
876 }
877 
878 inline bool is_int(const int dtype) {
879  return dtype == mshadow::kUint8 || dtype == mshadow::kInt8 ||
880  dtype == mshadow::kInt32 || dtype == mshadow::kInt64;
881 }
882 
883 inline int get_more_precise_type(const int type1, const int type2) {
884  if (type1 == type2) return type1;
885  if (is_float(type1) && is_float(type2)) {
886  if (type1 == mshadow::kFloat64 || type2 == mshadow::kFloat64) {
887  return mshadow::kFloat64;
888  }
889  if (type1 == mshadow::kFloat32 || type2 == mshadow::kFloat32) {
890  return mshadow::kFloat32;
891  }
892  return mshadow::kFloat16;
893  } else if (is_float(type1) || is_float(type2)) {
894  return is_float(type1) ? type1 : type2;
895  }
896  if (type1 == mshadow::kInt64 || type2 == mshadow::kInt64) {
897  return mshadow::kInt64;
898  }
899  if (type1 == mshadow::kInt32 || type2 == mshadow::kInt32) {
900  return mshadow::kInt32;
901  }
902  CHECK(!((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
903  (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)))
904  << "1 is UInt8 and 1 is Int8 should not get here";
905  if (type1 == mshadow::kUint8 || type2 == mshadow::kUint8) {
906  return mshadow::kUint8;
907  }
908  return mshadow::kInt8;
909 }
910 
911 inline int np_binary_out_infer_type(const int type1, const int type2) {
912  if ((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
913  (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)) {
914  return mshadow::kInt32;
915  }
916  return get_more_precise_type(type1, type2);
917 }
918 
919 inline int GetDefaultDtype() {
923 }
924 
925 inline int GetDefaultDtype(int dtype) {
926  if (dtype != -1) return dtype;
930 }
931 
932 } // namespace common
933 } // namespace mxnet
934 #endif // MXNET_COMMON_UTILS_H_
Definition: base.h:352
Definition: ndarray.h:74
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:91
Definition: ndarray.h:63
NDArrayStorageType
Definition: ndarray.h:61
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:130
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:120
Definition: ndarray.h:54
NDArrayStorageType storage_type() const
Definition: ndarray.h:322
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:220
int GetNumThreadsPerGPU()
Definition: utils.h:508
const mxnet::TShape & shape() const
Definition: ndarray.h:222
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
The attributes of the current operation node. Usually are additional parameters like axis...
Definition: node.h:119
std::vector< NodeEntry > outputs
output entries contained in the symbol
Definition: symbolic.h:73
void ExecuteMonOutputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:425
namespace of mxnet
Definition: api_registry.h:33
void KnownBound
Type of T.
Definition: utils.h:622
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:539
int64_t dim_t
data type to store dim size
Definition: tuple.h:38
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:74
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:670
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:522
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:469
int GetDefaultDtype()
Definition: utils.h:919
Context ctx
base Context
Definition: base.h:352
A threadlocal store to store threadlocal variables. Will return a thread local singleton of type T...
Definition: thread_local.h:35
Definition: ndarray.h:72
execution time context. The information needed in runtime for actual execution.
Definition: base.h:350
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:123
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:388
Graph node data structure.
Definition: ndarray.h:65
Data structures that can appear in graph attributes.
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: base.h:105
bool is_np_default_dtype() const
return current numpy default dtype compatibility status.
Definition: imperative.h:123
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:561
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:67
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:252
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:451
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:51
std::vector< std::string > ListOutputNames() const
List the names of outputs for this symbol.
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:585
#define MSHADOW_XINLINE
Definition: base.h:223
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:820
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:107
const Op * op
The operator this node uses. For place holder variable, op == nullptr.
Definition: node.h:124
Definition: ndarray.h:58
Definition: base.h:107
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:107
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:370
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:401
Definition: base.h:357
Helper for non-array type T.
Definition: utils.h:596
bool dim_size_is_known(const dim_t dim_size)
Definition: tuple.h:420
Definition: base.h:106
Definition: ndarray.h:54
const mxnet::TShape & storage_shape() const
Definition: ndarray.h:230
void ParallelAdd(DType *dst, const DType *src, index_t size)
Definition: utils.h:769
void ExecuteMonInputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
Definition: base.h:353
std::string name
name of the operator
Definition: op.h:106
Definition: ndarray.h:64
bool is_int(const int dtype)
Definition: utils.h:878
an entry that represents output data from a node
Definition: node.h:51
int np_binary_out_infer_type(const int type1, const int type2)
Definition: utils.h:911
const mxnet::TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:242
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:74
Definition: ndarray.h:71
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:611
Definition: base.h:356
Configuation of nnvm as well as basic data structure.
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:46
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:44
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs.
Definition: utils.h:342
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:689
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:73
static Imperative * Get()
std::unordered_map< std::string, std::string > dict
The dictionary representation of attributes.
Definition: node.h:128
size_t current_process_id()
Definition: utils.h:68
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:600
Definition: base.h:355
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:438
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:184
int GetExecNumMatchColor()
Definition: utils.h:515
header to handle OpenMP compatibility issues
int ndim() const
Definition: tuple.h:218
#define MSHADOW_TYPE_SWITCH(type, DType,...)
Definition: base.h:1067
Definition: base.h:108
std::string attr_value_string(const nnvm::NodeAttrs &attrs, const std::string &attr_name, std::string default_val="")
Definition: utils.h:415
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:109
mxnet::TShape CanonicalizeAxes(const mxnet::TShape &src)
Definition: utils.h:859
bool ndim_is_known(const int ndim)
Definition: tuple.h:414
int get_more_precise_type(const int type1, const int type2)
Definition: utils.h:883
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:95
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:705
Definition: base.h:354
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:480
Definition: base.h:358
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:639
Context information about the execution environment.
Definition: base.h:102
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:89
bool is_float(const int dtype)
Definition: utils.h:874
ndarray interface
Definition: ndarray.h:82
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:753
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:720
void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:735
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:711
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:76
Symbol is help class used to represent the operator node in Graph.
Definition: symbolic.h:50
void ConvertToLegacyShape(mxnet::TShape *shape)
This is function is used to convert shapes returned by the infer shape functions/pass to the legacy s...
Definition: utils.h:823
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
Operator structure.
Definition: op.h:103
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:66
void ConvertToNumpyShape(mxnet::TShape *shape)
If numpy compatibility is turned off (default), the shapes passed in by users follow the legacy shape...
Definition: utils.h:801
computaion stream structure, used for asynchronous computations
Definition: tensor.h:384