docs/api/utils_8h_source.html

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #ifndef MXNET_COMMON_UTILS_H_
 #define MXNET_COMMON_UTILS_H_

 #include <dmlc/logging.h>
 #include <dmlc/omp.h>
 #include <nnvm/graph.h>
 #include <nnvm/node.h>
 #include <mxnet/engine.h>
 #include <mxnet/ndarray.h>
 #include <mxnet/op_attr_types.h>
 #include <mxnet/graph_attr_types.h>
 #include <nnvm/graph_attr_types.h>

 #include <memory>
 #include <vector>
 #include <type_traits>
 #include <utility>
 #include <random>
 #include <string>
 #include <thread>
 #include <algorithm>
 #include <functional>
 #include <limits>

 #include "../operator/mxnet_op.h"
 #if MXNET_USE_MKLDNN == 1
 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
 #endif

 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
 #include <windows.h>
 #else
 #include <unistd.h>
 #endif


 namespace mxnet {
 namespace common {

 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
 inline size_t current_process_id() { return ::GetCurrentProcessId(); }
 #else
 inline size_t current_process_id() { return getpid(); }
 #endif

 struct csr_indptr_check {
   template<typename DType, typename IType>
   MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
                                   const nnvm::dim_t end, const nnvm::dim_t idx_size) {
     if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
         (i == 0 && indptr[i] != 0) ||
         (i == end - 1 && indptr[end] != idx_size))
       *out = kCSRIndPtrErr;
   }
 };

 struct csr_idx_check {
   template<typename DType, typename IType, typename RType>
   MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
                                   const RType* indptr, const nnvm::dim_t ncols) {
     for (RType j = indptr[i]; j < indptr[i+1]; j++) {
       if (idx[j] >= ncols || idx[j] < 0 ||
           (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
         *out = kCSRIdxErr;
         break;
       }
     }
   }
 };

 struct rsp_idx_check {
   template<typename DType, typename IType>
   MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
                                   const nnvm::dim_t end, const nnvm::dim_t nrows) {
     if ((i < end && idx[i+1] <= idx[i])
         || idx[i] < 0 || idx[i] >= nrows)
       *out = kRSPIdxErr;
   }
 };

 template<typename xpu>
 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
                         const TBlob &err_cpu, const bool full_check);

 template<typename xpu>
 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
                         const TBlob &err_cpu, const bool full_check) {
   using namespace op::mxnet_op;
   CHECK_EQ(input.storage_type(), kCSRStorage)
           << "CheckFormatCSRImpl is for CSRNDArray";
   const mxnet::TShape shape = input.shape();
   const mxnet::TShape idx_shape = input.aux_shape(csr::kIdx);
   const mxnet::TShape indptr_shape = input.aux_shape(csr::kIndPtr);
   const mxnet::TShape storage_shape = input.storage_shape();
   if ((shape.ndim() != 2) ||
       (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
       (indptr_shape[0] != shape[0] + 1) ||
       (idx_shape[0] != storage_shape[0])) {
      MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
        DType* err = err_cpu.dptr<DType>();
        *err = kCSRShapeErr;
      });
      return;
   }
   if (full_check) {
     MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
         MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
           mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
           NDArray ret_xpu = NDArray(mshadow::Shape1(1),
                                     rctx.get_ctx(), false, err_cpu.type_flag_);
           TBlob val_xpu = ret_xpu.data();
           Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
           Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
             input.aux_data(csr::kIndPtr).dptr<RType>(),
             indptr_shape[0] - 1, idx_shape[0]);
           // no need to check indices if indices are empty
           if (idx_shape[0] != 0) {
             Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
               input.aux_data(csr::kIdx).dptr<IType>(),
               input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
           }
           mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
                         val_xpu.get<xpu, 1, DType>(s), s);
         });
       });
     });
   }
 }

 template<typename xpu>
 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
                         const TBlob &err_cpu, const bool full_check) {
   using namespace op::mxnet_op;
   CHECK_EQ(input.storage_type(), kRowSparseStorage)
           << "CheckFormatRSPImpl is for RSPNDArray";
   const mxnet::TShape idx_shape = input.aux_shape(rowsparse::kIdx);
   if (idx_shape[0] != input.storage_shape()[0]) {
     MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
       DType* err = err_cpu.dptr<DType>();
       *err = kRSPShapeErr;
     });
     return;
   }
   if (idx_shape[0] == 0) {
     return;
   }
   if (full_check) {
     MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
         mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
         NDArray ret_xpu = NDArray(mshadow::Shape1(1),
                                   rctx.get_ctx(), false, err_cpu.type_flag_);
         TBlob val_xpu = ret_xpu.data();
         Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());

         Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
           val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
           idx_shape[0] - 1, input.shape()[0]);
         mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
                       val_xpu.get<xpu, 1, DType>(s), s);
       });
     });
   }
 }

 template<typename xpu>
 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
                      const TBlob &err_cpu, const bool full_check) {
   int stype = input.storage_type();
   if (stype == kCSRStorage) {
     CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
   } else if (stype == kRowSparseStorage) {
     CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
   } else if (stype == kDefaultStorage) {
     // no-op for default storage
   } else {
     LOG(FATAL) << "Unknown storage type " << stype;
   }
 }

 template<typename xpu>
 void SparseRetainOpForwardRspWrapper(mshadow::Stream<xpu> *s,
                                      const NDArray& input_nd,
                                      const TBlob& idx_data,
                                      const OpReqType req,
                                      NDArray* output_nd);

 /* \brief Casts tensor storage type to the new type.
  */
 template<typename xpu>
 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);

 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
                                 const NDArrayStorageType stype) {
   if (!vstorage.empty()) {
     for (const auto& i : vstorage) {
       if (i != stype) return false;
     }
     return true;
   }
   return false;
 }

 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
                                 const NDArrayStorageType stype1,
                                 const NDArrayStorageType stype2,
                                 bool *has_both) {
   if (has_both) {
     *has_both = false;
   }
   if (!vstorage.empty()) {
     uint8_t has = 0;
     for (const auto i : vstorage) {
       if (i == stype1) {
         has |= 1;
       } else if (i == stype2) {
         has |= 2;
       } else {
         return false;
       }
     }
     if (has_both) {
       *has_both = has == 3;
     }
     return true;
   }
   return false;
 }

 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
                                 const NDArrayStorageType stype) {
   if (!ndarrays.empty()) {
     for (const auto& nd : ndarrays) {
       if (nd.storage_type() != stype) {
         return false;
       }
     }
     return true;
   }
   return false;
 }

 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
                                 const NDArrayStorageType stype1,
                                 const NDArrayStorageType stype2,
                                 bool *has_both) {
   if (has_both) {
     *has_both = false;
   }
   if (!ndarrays.empty()) {
     uint8_t has = 0;
     for (const auto& nd : ndarrays) {
       const NDArrayStorageType stype = nd.storage_type();
       if (stype == stype1) {
         has |= 1;
       } else if (stype == stype2) {
         has |= 2;
       } else {
         return false;
       }
     }
     if (has_both) {
       *has_both = has == 3;
     }
     return true;
   }
   return false;
 }

 inline bool ContainsStorageType(const std::vector<NDArray>& ndarrays,
                                 const NDArrayStorageType stype) {
   if (!ndarrays.empty()) {
     for (const auto& nd : ndarrays) {
       if (nd.storage_type() == stype) {
         return true;
       }
     }
   }
   return false;
 }

 inline bool ContainsStorageType(const std::vector<int>& ndstypes,
                                 const NDArrayStorageType stype) {
   if (!ndstypes.empty()) {
     for (const auto& ndstype : ndstypes) {
       if (ndstype == stype) {
         return true;
       }
     }
   }
   return false;
 }

 inline std::string dtype_string(const int dtype) {
   switch (dtype) {
     case mshadow::kFloat32:
       return "float";
     case mshadow::kFloat64:
       return "double";
     case mshadow::kFloat16:
       return "half";
     case mshadow::kUint8:
       return "unsigned char";
     case mshadow::kInt8:
       return "char";
     case mshadow::kInt32:
       return "int";
     case mshadow::kInt64:
       return "long long";
     case mshadow::kBool:
       return "bool";
     default:
       LOG(FATAL) << "Unknown type enum " << dtype;
   }
   return "unknown";
 }

 inline std::string dispatch_mode_string(const DispatchMode x) {
   switch (x) {
     case DispatchMode::kFCompute:
       return "fcompute";
     case DispatchMode::kFComputeEx:
       return "fcompute_ex";
     case DispatchMode::kFComputeFallback:
       return "fcompute_fallback";
     case DispatchMode::kVariable:
       return "variable";
     case DispatchMode::kUndefined:
       return "undefined";
   }
   return "unknown";
 }


 inline std::string stype_string(const int x) {
   switch (x) {
     case kDefaultStorage:
       return "default";
     case kCSRStorage:
       return "csr";
     case kRowSparseStorage:
       return "row_sparse";
   }
   return "unknown";
 }

 inline std::string dev_type_string(const int dev_type) {
   switch (dev_type) {
     case Context::kCPU:
       return "cpu";
     case Context::kGPU:
       return "gpu";
     case Context::kCPUPinned:
       return "cpu_pinned";
     case Context::kCPUShared:
       return "cpu_shared";
   }
   return "unknown";
 }

 inline std::string operator_stype_string(const nnvm::NodeAttrs& attrs,
                                          const int dev_mask,
                                          const std::vector<int>& in_attrs,
                                          const std::vector<int>& out_attrs) {
   std::ostringstream os;
   os << "operator = " << attrs.op->name
      << "\ninput storage types = [";
   for (const int attr : in_attrs) {
     os << stype_string(attr) << ", ";
   }
   os << "]\n"
      << "output storage types = [";
   for (const int attr : out_attrs) {
     os << stype_string(attr) << ", ";
   }
   os << "]\n"
      << "params = {";
   for (auto kv : attrs.dict) {
     os << "\"" << kv.first << "\" : " << kv.second << ", ";
   }
   os << "}\n"
      << "context.dev_mask = " << dev_type_string(dev_mask);
   return os.str();
 }

 inline std::string operator_string(const nnvm::NodeAttrs& attrs,
                                   const OpContext& ctx,
                                   const std::vector<NDArray>& inputs,
                                   const std::vector<OpReqType>& req,
                                   const std::vector<NDArray>& outputs) {
   std::string result = "";
   std::vector<int> in_stypes;
   std::vector<int> out_stypes;
   in_stypes.reserve(inputs.size());
   out_stypes.reserve(outputs.size());
   auto xform = [](const NDArray arr) -> int { return arr.storage_type(); };
   std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
   std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
   result += operator_stype_string(attrs, ctx.run_ctx.ctx.dev_mask(), in_stypes, out_stypes);
   return result;
 }

 inline void LogOnce(const std::string& message) {
   typedef dmlc::ThreadLocalStore<std::unordered_set<std::string>> LogStore;
   auto log_store = LogStore::Get();
   if (log_store->find(message) == log_store->end()) {
     LOG(INFO) << message;
     log_store->insert(message);
   }
 }

 inline void LogStorageFallback(const nnvm::NodeAttrs& attrs,
                                const int dev_mask,
                                const std::vector<int>* in_attrs,
                                const std::vector<int>* out_attrs) {
   static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
   if (!log) return;
   const std::string op_str = operator_stype_string(attrs, dev_mask, *in_attrs, *out_attrs);
   std::ostringstream os;
   const char* warning = "\nThe operator with default storage type will be dispatched "
     "for execution. You're seeing this warning message because the operator above is unable "
     "to process the given ndarrays with specified storage types, context and parameter. "
     "Temporary dense ndarrays are generated in order to execute the operator. "
     "This does not affect the correctness of the programme. "
     "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "
     "0 to suppress this warning.";
   os << "\nStorage type fallback detected:\n" << op_str << warning;
   LogOnce(os.str());
 #if MXNET_USE_MKLDNN == 1
   if (!MKLDNNEnvSet()) common::LogOnce("MXNET_MKLDNN_ENABLED flag is off. "
                                        "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
   if (GetMKLDNNCacheSize() != -1) common::LogOnce("MXNET_MKLDNN_CACHE_NUM is set."
                                        "Should only be set if "
                                        "your model has variable input shapes, "
                                        "as cache size may grow unbounded");
 #endif
 }

 // heuristic to dermine number of threads per GPU
 inline int GetNumThreadsPerGPU() {
   // This is resource efficient option.
   return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
 }

 // heuristic to get number of matching colors.
 // this decides how much parallelism we can get in each GPU.
 inline int GetExecNumMatchColor() {
   // This is resource efficient option.
   int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
   return std::min(num_match_color, GetNumThreadsPerGPU());
 }

 template<typename T, typename V>
 V ParallelAccumulate(const T* a, const int n, V start) {
   V sum = start;
 #pragma omp parallel for reduction(+:sum)
   for (int i = 0; i < n; ++i) {
     sum += a[i];
   }
   return sum;
 }

 template<typename RandomIt, typename Compare>
 void ParallelSortHelper(RandomIt first, size_t len,
                         size_t grainsize, const Compare& comp) {
   if (len < grainsize) {
     std::sort(first, first+len, comp);
   } else {
     std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
     ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
     thr.join();
     std::inplace_merge(first, first+len/2, first+len, comp);
   }
 }

 template<typename RandomIt, typename Compare>
 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
   const auto num = std::distance(first, last);
   size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
   ParallelSortHelper(first, num, grainsize, comp);
 }

 template<typename RandomIt>
 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
   ParallelSort(first, last, num_threads,
                std::less<typename std::iterator_traits<RandomIt>::value_type>());
 }

 typedef std::mt19937 RANDOM_ENGINE;

 namespace helper {

 template <class T>
 struct UniqueIf {
   using SingleObject = std::unique_ptr<T>;
 };

 template <class T>
 struct UniqueIf<T[]> {
   using UnknownBound = std::unique_ptr<T[]>;
 };

 template <class T, size_t kSize>
 struct UniqueIf<T[kSize]> {
   using KnownBound = void;
 };

 }  // namespace helper

 template <class T, class... Args>
 typename helper::UniqueIf<T>::SingleObject MakeUnique(Args&&... args) {
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }

 template <class T>
 typename helper::UniqueIf<T>::UnknownBound MakeUnique(size_t n) {
   using U = typename std::remove_extent<T>::type;
   return std::unique_ptr<T>(new U[n]{});
 }

 template <class T, class... Args>
 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;

 template<typename FCompType>
 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
                       const Context& ctx) {
   static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
   static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");

   if (ctx.dev_mask() == cpu::kDevMask) {
     return fcompute_cpu.get(op, nullptr);
   } else if (ctx.dev_mask() == gpu::kDevMask) {
     return fcompute_gpu.get(op, nullptr);
   } else {
     LOG(FATAL) << "Unknown device mask " << ctx.dev_mask();
     return nullptr;
   }
 }

 template <typename T>
 constexpr size_t MaxIntegerValue() {
   return std::is_integral<T>::value ?
     std::numeric_limits<T>::max():
     size_t(2) << (std::numeric_limits<T>::digits - 1);
 }

 template <>
 constexpr size_t MaxIntegerValue<mshadow::half::half_t>() {
   return size_t(2) << 10;
 }

 MSHADOW_XINLINE int ilog2ul(size_t a) {
   int k = 1;
   while (a >>= 1) ++k;
   return k;
 }

 MSHADOW_XINLINE int ilog2ui(unsigned int a) {
   int k = 1;
   while (a >>= 1) ++k;
   return k;
 }

 inline NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
                          const Context &ctx, const int dtype) {
   // NDArray with default storage
   if (stype == kDefaultStorage) {
     NDArray ret(shape, ctx, false, dtype);
     ret = 0;
     return ret;
   }
   // NDArray with non-default storage. Storage allocation is always delayed.
   return NDArray(stype, shape, ctx, true, dtype);
 }

 inline void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
                              const Context &ctx, const int dtype,
                              std::vector<NDArray> *vec) {
   // NDArray with default storage
   if (stype == kDefaultStorage) {
     vec->emplace_back(shape, ctx, false, dtype);
     vec->back() = 0;
   } else {
     // NDArray with non-default storage. Storage allocation is always delayed.
     vec->emplace_back(stype, shape, ctx, true, dtype);
   }
 }


 template<typename DType>
 inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
   static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
   if (size >= copy_block_size) {
     #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
     for (index_t i = 0; i < size; ++i) {
       dst[i] = src[i];
     }
   } else {
     std::memcpy(dst, src, sizeof(DType) * size);
   }
 }

 template<typename DType>
 inline void ParallelAdd(DType* dst, const DType* src, index_t size) {
   static index_t add_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
   if (size >= add_block_size) {
     #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
     for (index_t i = 0; i < size; ++i) {
       dst[i] += src[i];
     }
   } else {
     for (index_t i = 0; i < size; ++i) {
       dst[i] += src[i];
     }
   }
 }

 inline void ConvertToNumpyShape(mxnet::TShape* shape) {
   if (shape->ndim() == 0) {  // legacy shape ndim = 0 means unknown
     *shape = mxnet::TShape();  // unknown shape ndim = -1
   } else {
     for (int j = 0; j < shape->ndim(); ++j) {
       if ((*shape)[j] == 0) {  // legacy shape dim_size = 0 means unknown
         (*shape)[j] = -1;  // unknown dim size = -1
       }
     }
   }
 }

 inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) {
   for (size_t i = 0; i < shapes->size(); ++i) {
     ConvertToNumpyShape(&(shapes->at(i)));
   }
 }

 inline void ConvertToLegacyShape(mxnet::TShape* shape) {
   if (!mxnet::ndim_is_known(*shape)) {
     *shape = mxnet::TShape(0, -1);
   } else {
     for (int j = 0; j < shape->ndim(); ++j) {
       if (!mxnet::dim_size_is_known(*shape, j)) {
         (*shape)[j] = 0;
       }
     }
   }
 }

 inline void ConvertToLegacyShape(mxnet::ShapeVector* shapes) {
   for (size_t i = 0; i < shapes->size(); ++i) {
     ConvertToLegacyShape(&(shapes->at(i)));
   }
 }
 void ExecuteMonInputCallback(
     const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
     size_t nid, const std::function<void(const char *, const char *, void *)>
                     &monitor_callback);

 void ExecuteMonOutputCallback(
     const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
     size_t nid, const std::function<void(const char *, const char *, void *)>
                     &monitor_callback);

 static inline std::string GetOutputName(const nnvm::NodeEntry& e) {
   nnvm::Symbol sym;
   sym.outputs.push_back(e);
   return sym.ListOutputNames()[0];
 }

 inline mxnet::TShape CanonicalizeAxes(const mxnet::TShape& src) {
   // convert negative axes to positive values
   const int ndim = src.ndim();
   mxnet::TShape axes = src;
   for (int i = 0; i < ndim; ++i) {
     if (axes[i] < 0) {
       axes[i] += ndim;
     }
     CHECK(axes[i] >= 0 && axes[i] < ndim) << "axes[" << i << "]="
                                           << axes[i] << " exceeds the range ["
                                           << 0 << ", " << ndim << ")";
   }
   return axes;
 }

 inline bool is_float(const int dtype) {
   return dtype == mshadow::kFloat32 || dtype == mshadow::kFloat64 || dtype == mshadow::kFloat16;
 }

 inline int get_more_precise_type(const int type1, const int type2) {
   if (type1 == type2) return type1;
   if (is_float(type1) && is_float(type2)) {
     if (type1 == mshadow::kFloat64 || type2 == mshadow::kFloat64) {
       return mshadow::kFloat64;
     }
     if (type1 == mshadow::kFloat32 || type2 == mshadow::kFloat32) {
       return mshadow::kFloat32;
     }
     return mshadow::kFloat16;
   } else if (is_float(type1) || is_float(type2)) {
     return is_float(type1) ? type1 : type2;
   }
   if (type1 == mshadow::kInt64 || type2 == mshadow::kInt64) {
     return mshadow::kInt64;
   }
   if (type1 == mshadow::kInt32 || type2 == mshadow::kInt32) {
     return mshadow::kInt32;
   }
   CHECK(!((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
           (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)))
     << "1 is UInt8 and 1 is Int8 should not get here";
   if (type1 == mshadow::kUint8 || type2 == mshadow::kUint8) {
     return mshadow::kUint8;
   }
   return mshadow::kInt8;
 }

 inline int np_binary_out_infer_type(const int type1, const int type2) {
   if ((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
       (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)) {
     return mshadow::kInt32;
   }
   return get_more_precise_type(type1, type2);
 }

 }  // namespace common
 }  // namespace mxnet
 #endif  // MXNET_COMMON_UTILS_H_
mshadow::kFloat32
Definition: base.h:307

mxnet::kRSPIdxErr
Definition: ndarray.h:74

mxnet::common::csr_idx_check::Map
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:90

mxnet::kDefaultStorage
Definition: ndarray.h:63

mxnet::NDArrayStorageType
NDArrayStorageType
Definition: ndarray.h:61

mxnet::common::CheckFormatCSRImpl
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:129

mxnet::Context::dev_mask
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:120

mxnet::csr::kIdx
Definition: ndarray.h:54

mxnet::DispatchMode::kVariable

mxnet::NDArray::storage_type
NDArrayStorageType storage_type() const
Definition: ndarray.h:322

node.h
Graph node data structure.

engine.h
Engine that schedules all the operations according to dependency.

mxnet::common::CheckFormatImpl
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:219

mxnet::common::GetNumThreadsPerGPU
int GetNumThreadsPerGPU()
Definition: utils.h:522

mxnet::NDArray::shape
const mxnet::TShape & shape() const
Definition: ndarray.h:222

mxnet::common::SparseRetainOpForwardRspWrapper
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...

nnvm::NodeAttrs
The attributes of the current operation node. Usually are additional parameters like axis...
Definition: node.h:120

nnvm::Symbol::outputs
std::vector< NodeEntry > outputs
output entries contained in the symbol
Definition: symbolic.h:74

mxnet::common::ExecuteMonOutputCallback
void ExecuteMonOutputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)

mxnet::common::operator_stype_string
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:439

mxnet
namespace of mxnet
Definition: base.h:89

mxnet::DispatchMode::kFComputeEx

mxnet::common::helper::UniqueIf< T[kSize]>::KnownBound
void KnownBound
Type of T.
Definition: utils.h:636

mxnet::common::ParallelSortHelper
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:553

nnvm::dim_t
int64_t dim_t
data type to store dim size
Definition: tuple.h:39

mxnet::TBlob::type_flag_
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:74

mxnet::common::GetFCompute
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:684

mxnet::common::ParallelAccumulate
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:536

mxnet::common::LogOnce
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:483

mxnet::RunContext::ctx
Context ctx
base Context
Definition: base.h:352

dmlc::ThreadLocalStore
A threadlocal store to store threadlocal variables. Will return a thread local singleton of type T...
Definition: thread_local.h:35

mxnet::kCSRIdxErr
Definition: ndarray.h:72

mxnet::RunContext
execution time context. The information needed in runtime for actual execution.
Definition: base.h:350

mxnet::DispatchMode
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122

mxnet::DispatchMode::kFCompute

mxnet::common::stype_string
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:411

mxnet::DispatchMode::kFComputeFallback

mxnet::kCSRStorage
Definition: ndarray.h:65

graph_attr_types.h
Data structures that can appear in graph attributes.

mxnet::common::CastStorageDispatch
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)

mxnet::common::CheckFormatWrapper
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)

mxnet::Context::kCPU
Definition: base.h:105

mxnet::common::ParallelSort
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:575

mxnet::OpContext
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66

mxnet::common::ContainsOnlyStorage
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:251

mxnet::common::operator_string
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:465

mshadow::gpu::kDevMask
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:32

nnvm::Symbol::ListOutputNames
std::vector< std::string > ListOutputNames() const
List the names of outputs for this symbol.

mxnet::common::RANDOM_ENGINE
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:599

MSHADOW_XINLINE
#define MSHADOW_XINLINE
Definition: base.h:204

mxnet::ShapeVector
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:793

mxnet::common::rsp_idx_check
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:106

nnvm::NodeAttrs::op
const Op * op
The operator this node uses. For place holder variable, op == nullptr.
Definition: node.h:125

mxnet::rowsparse::kIdx
Definition: ndarray.h:58

mxnet::Context::kCPUPinned
Definition: base.h:107

nnvm::IndexedGraph
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:108

mxnet::common::dispatch_mode_string
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:393

mxnet::common::dev_type_string
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:424

mshadow::kInt8
Definition: base.h:312

mxnet::common::helper::UniqueIf
Helper for non-array type T.
Definition: utils.h:610

mxnet::dim_size_is_known
bool dim_size_is_known(const dim_t dim_size)
Definition: tuple.h:395

mxnet::Context::kGPU
Definition: base.h:106

mxnet::csr::kIndPtr
Definition: ndarray.h:54

mxnet::NDArray::storage_shape
const mxnet::TShape & storage_shape() const
Definition: ndarray.h:230

mxnet::common::ParallelAdd
void ParallelAdd(DType *dst, const DType *src, index_t size)
Definition: utils.h:778

mxnet::common::ExecuteMonInputCallback
void ExecuteMonInputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)

mshadow::kBool
Definition: base.h:314

mshadow::kFloat64
Definition: base.h:308

nnvm::Op::name
std::string name
name of the operator
Definition: op.h:107

mxnet::kRowSparseStorage
Definition: ndarray.h:64

nnvm::NodeEntry
an entry that represents output data from a node
Definition: node.h:52

mxnet::common::np_binary_out_infer_type
int np_binary_out_infer_type(const int type1, const int type2)
Definition: utils.h:915

mxnet::common::dtype_string
std::string dtype_string(const int dtype)
Definition: utils.h:368

mxnet::NDArray::aux_shape
const mxnet::TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:242

mxnet::common::csr_indptr_check
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:73

mxnet::kCSRIndPtrErr
Definition: ndarray.h:71

mxnet::common::helper::UniqueIf< T[]>::UnknownBound
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:625

mshadow::kInt32
Definition: base.h:311

graph.h
Configuation of nnvm as well as basic data structure.

mxnet::OpReqType
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45

mshadow::cpu::kDevMask
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:25

mxnet::common::ContainsStorageType
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs.
Definition: utils.h:341

mxnet::common::MaxIntegerValue
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:703

mxnet::OpContext::run_ctx
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:72

nnvm::NodeAttrs::dict
std::unordered_map< std::string, std::string > dict
The dictionary representation of attributes.
Definition: node.h:129

mxnet::common::current_process_id
size_t current_process_id()
Definition: utils.h:67

mxnet::common::helper::UniqueIf::SingleObject
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:614

mshadow::kUint8
Definition: base.h:310

mxnet::TShape
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:413

mxnet::common::CheckFormatRSPImpl
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:183

mxnet::common::GetExecNumMatchColor
int GetExecNumMatchColor()
Definition: utils.h:529

omp.h
header to handle OpenMP compatibility issues

mxnet::Tuple::ndim
int ndim() const
Definition: tuple.h:193

MSHADOW_TYPE_SWITCH
#define MSHADOW_TYPE_SWITCH(type, DType,...)
Definition: base.h:991

mxnet::Context::kCPUShared
Definition: base.h:108

mxnet::common::rsp_idx_check::Map
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:108

graph_attr_types.h

mxnet::common::CanonicalizeAxes
mxnet::TShape CanonicalizeAxes(const mxnet::TShape &src)
Definition: utils.h:868

mxnet::ndim_is_known
bool ndim_is_known(const int ndim)
Definition: tuple.h:389

mxnet::common::get_more_precise_type
int get_more_precise_type(const int type1, const int type2)
Definition: utils.h:887

mxnet::index_t
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:95

mxnet::common::ilog2ul
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:714

mshadow::kFloat16
Definition: base.h:309

mxnet::common::LogStorageFallback
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:494

mshadow::kInt64
Definition: base.h:313

mxnet::common::MakeUnique
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:653

mxnet::Context
Context information about the execution environment.
Definition: base.h:102

mxnet::common::csr_idx_check
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:88

mxnet::common::is_float
bool is_float(const int dtype)
Definition: utils.h:883

ndarray.h

mxnet::DispatchMode::kUndefined

mxnet::NDArray
ndarray interface
Definition: ndarray.h:82

mxnet::common::ParallelCopy
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:762

mxnet::common::InitZeros
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:729

mxnet::common::EmplaceBackZeros
void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:744

mxnet::common::ilog2ui
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:720

mxnet::common::csr_indptr_check::Map
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:75

nnvm::Symbol
Symbol is help class used to represent the operator node in Graph.
Definition: symbolic.h:51

mxnet::common::ConvertToLegacyShape
void ConvertToLegacyShape(mxnet::TShape *shape)
This is function is used to convert shapes returned by the infer shape functions/pass to the legacy s...
Definition: utils.h:832

mxnet::StorageTypeVector
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45

nnvm::Op
Operator structure.
Definition: op.h:104

mxnet::TBlob
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:66

mxnet::common::ConvertToNumpyShape
void ConvertToNumpyShape(mxnet::TShape *shape)
If numpy compatibility is turned off (default), the shapes passed in by users follow the legacy shape...
Definition: utils.h:810

mshadow::Stream
computaion stream structure, used for asynchronous computations
Definition: tensor.h:365

op_attr_types.h