Go to the documentation of this file.
24 #ifndef MXNET_COMMON_UTILS_H_
25 #define MXNET_COMMON_UTILS_H_
27 #include <dmlc/logging.h>
41 #include <type_traits>
50 #include "../operator/mxnet_op.h"
51 #if MXNET_USE_ONEDNN == 1
52 #include "../operator/nn/dnnl/dnnl_base-inl.h"
55 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
64 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
66 return ::GetCurrentProcessId();
78 template <
typename DType,
typename IType>
84 if (indptr[i + 1] < 0 || indptr[i + 1] < indptr[i] || (i == 0 && indptr[i] != 0) ||
85 (i == end - 1 && indptr[end] != idx_size))
95 template <
typename DType,
typename IType,
typename RType>
101 for (RType j = indptr[i]; j < indptr[i + 1]; j++) {
102 if (idx[j] >= ncols || idx[j] < 0 || (j < indptr[i + 1] - 1 && idx[j] >= idx[j + 1])) {
115 template <
typename DType,
typename IType>
121 if ((i < end && idx[i + 1] <= idx[i]) || idx[i] < 0 || idx[i] >= nrows)
126 template <
typename xpu>
129 const TBlob& err_cpu,
130 const bool full_check);
140 template <
typename xpu>
143 const TBlob& err_cpu,
144 const bool full_check) {
145 using namespace op::mxnet_op;
151 if ((shape.
ndim() != 2) ||
152 (idx_shape.
ndim() != 1 || indptr_shape.
ndim() != 1 || storage_shape.
ndim() != 1) ||
153 (indptr_shape[0] != shape[0] + 1) || (idx_shape[0] != storage_shape[0])) {
155 DType* err = err_cpu.dptr<DType>();
162 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
163 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
164 mshadow::Stream<xpu>* s = rctx.get_stream<xpu>();
165 NDArray ret_xpu = NDArray(mshadow::Shape1(1), rctx.get_ctx(), false, err_cpu.type_flag_);
166 TBlob val_xpu = ret_xpu.data();
167 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
168 Kernel<csr_indptr_check, xpu>::Launch(s,
170 val_xpu.dptr<DType>(),
171 input.aux_data(csr::kIndPtr).dptr<RType>(),
175 if (idx_shape[0] != 0) {
176 Kernel<csr_idx_check, xpu>::Launch(s,
178 val_xpu.dptr<DType>(),
179 input.aux_data(csr::kIdx).dptr<IType>(),
180 input.aux_data(csr::kIndPtr).dptr<RType>(),
183 mshadow::Copy(err_cpu.get<cpu, 1, DType>(), val_xpu.get<xpu, 1, DType>(s), s);
198 template <
typename xpu>
201 const TBlob& err_cpu,
202 const bool full_check) {
203 using namespace op::mxnet_op;
208 DType* err = err_cpu.dptr<DType>();
213 if (idx_shape[0] == 0) {
218 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
219 mshadow::Stream<xpu>* s = rctx.get_stream<xpu>();
220 NDArray ret_xpu = NDArray(mshadow::Shape1(1), rctx.get_ctx(), false, err_cpu.type_flag_);
221 TBlob val_xpu = ret_xpu.data();
222 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
224 Kernel<rsp_idx_check, xpu>::Launch(s,
226 val_xpu.dptr<DType>(),
227 input.aux_data(rowsparse::kIdx).dptr<IType>(),
230 mshadow::Copy(err_cpu.get<cpu, 1, DType>(), val_xpu.get<xpu, 1, DType>(s), s);
236 template <
typename xpu>
239 const TBlob& err_cpu,
240 const bool full_check) {
243 CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
245 CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
249 LOG(FATAL) <<
"Unknown storage type " << stype;
256 template <
typename xpu>
259 const TBlob& idx_data,
265 template <
typename xpu>
272 if (!vstorage.empty()) {
273 for (
const auto& i : vstorage) {
293 if (!vstorage.empty()) {
295 for (
const auto i : vstorage) {
298 }
else if (i == stype2) {
305 *has_both = has == 3;
317 if (!ndarrays.empty()) {
318 for (
const auto& nd : ndarrays) {
319 if (nd.storage_type() != stype) {
338 if (!ndarrays.empty()) {
340 for (
const auto& nd : ndarrays) {
342 if (stype == stype1) {
344 }
else if (stype == stype2) {
351 *has_both = has == 3;
363 if (!ndarrays.empty()) {
364 for (
const auto& nd : ndarrays) {
365 if (nd.storage_type() == stype) {
377 if (!ndstypes.empty()) {
378 for (
const auto& ndstype : ndstypes) {
379 if (ndstype == stype) {
393 return "fcompute_ex";
395 return "fcompute_fallback";
433 const std::string& attr_name,
434 std::string default_val =
"") {
435 if (attrs.
dict.find(attr_name) == attrs.
dict.end()) {
438 return attrs.
dict.at(attr_name);
442 template <
typename Fn>
444 const auto& found_it = attrs.
dict.find(attr_name);
445 if (found_it != attrs.
dict.end()) {
446 fn(found_it->second);
448 for (
const auto& subgraph : attrs.
subgraphs) {
450 [&](
const nnvm::ObjectPtr& node) { attr_foreach(node->attrs, attr_name, fn); });
454 template <
typename ValueType>
456 static_assert(std::is_integral<ValueType>::value,
"ValueType must be an integral type.");
458 ValueType result = 0;
459 attr_foreach(attrs, attr_name, [&](
const std::string& attr_value) {
460 std::istringstream ss(attr_value);
465 if (ss.fail() || !ss.eof()) {
466 LOG(WARNING) <<
"Incorrect value of an attribute: " << attr_name
467 <<
". Expected an integer, while got: " << attr_value;
476 const std::vector<int>& in_attrs,
477 const std::vector<int>& out_attrs) {
478 std::ostringstream os;
479 os <<
"operator = " << attrs.
op->
name <<
"\ninput storage types = [";
480 for (
const int attr : in_attrs) {
484 <<
"output storage types = [";
485 for (
const int attr : out_attrs) {
490 for (
auto kv : attrs.
dict) {
491 os <<
"\"" << kv.first <<
"\" : " << kv.second <<
", ";
501 const std::vector<NDArray>& inputs,
502 const std::vector<OpReqType>& req,
503 const std::vector<NDArray>& outputs) {
504 std::string result =
"";
505 std::vector<int> in_stypes;
506 std::vector<int> out_stypes;
507 in_stypes.reserve(inputs.size());
508 out_stypes.reserve(outputs.size());
509 auto xform = [](
const NDArray arr) ->
int {
return arr.storage_type(); };
510 std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
511 std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
517 inline void LogOnce(
const std::string& message) {
519 auto log_store = LogStore::Get();
520 if (log_store->find(message) == log_store->end()) {
521 LOG(INFO) << message;
522 log_store->insert(message);
530 const std::vector<int>* in_attrs,
531 const std::vector<int>* out_attrs) {
532 static bool log = dmlc::GetEnv(
"MXNET_STORAGE_FALLBACK_LOG_VERBOSE",
true);
536 std::ostringstream os;
537 const char* warning =
539 "Execution of the operator above will fallback to the generic implementation "
540 #if MXNET_USE_ONEDNN == 1
541 "(not utilizing kernels from oneDNN library) "
543 "with default dense storage type. You are seeing this warning message because "
544 #if MXNET_USE_ONEDNN == 1
545 "MXNET_ONEDNN_ENABLED flag is set to 0, in which case you can re-enable the default "
546 "execution path by setting MXNET_ONEDNN_ENABLED back to 1, or "
548 "the operator above is unable to process the given ndarrays with specified storage types, "
549 "context and/or parameter, in which case temporary dense ndarrays are generated in order to "
550 "execute the operator. The fallback does not affect the correctness of the programme. Using "
551 "default storage type performance degradation might be observed. \nYou can set environment "
552 "variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to 0 to suppress this warning.";
553 os <<
"\nStorage type fallback detected:\n" << op_str << warning;
555 #if MXNET_USE_ONEDNN == 1
556 if (GetDNNLCacheSize() != -1)
558 "MXNET_ONEDNN_CACHE_NUM is set."
559 "Should only be set if "
560 "your model has variable input shapes, "
561 "as cache size may grow unbounded");
568 return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
575 int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
579 template <
typename T,
typename V>
582 #pragma omp parallel for reduction(+ : sum)
583 for (
int i = 0; i < n; ++i) {
596 template <
typename RandomIt,
typename Compare>
598 if (len < grainsize) {
599 std::sort(first, first + len, comp);
601 std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len / 2, grainsize, comp);
604 std::inplace_merge(first, first + len / 2, first + len, comp);
617 template <
typename RandomIt,
typename Compare>
618 void ParallelSort(RandomIt first, RandomIt last,
size_t num_threads, Compare comp) {
619 const auto num = std::distance(first, last);
620 size_t grainsize = std::max(num / num_threads + 5,
static_cast<size_t>(1024 * 16));
633 template <
typename RandomIt>
636 first, last, num_threads, std::less<
typename std::iterator_traits<RandomIt>::value_type>());
674 template <
class T,
size_t kSize>
695 template <
class T,
class... Args>
697 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
711 using U =
typename std::remove_extent<T>::type;
712 return std::unique_ptr<T>(
new U[n]{});
723 template <
class T,
class... Args>
724 typename helper::UniqueIf<T>::KnownBound
MakeUnique(Args&&... args) =
delete;
726 template <
typename FCompType>
728 static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name +
"<cpu>");
729 static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name +
"<gpu>");
732 return fcompute_cpu.get(op,
nullptr);
734 return fcompute_gpu.get(op,
nullptr);
736 LOG(FATAL) <<
"Unknown device mask " << ctx.
dev_mask();
744 template <
typename T>
746 return std::is_integral<T>::value ? std::numeric_limits<T>::max() :
747 size_t(2) << (std::numeric_limits<T>::digits - 1);
751 constexpr
size_t MaxIntegerValue<mshadow::half::half_t>() {
752 return size_t(2) << 10;
756 constexpr
size_t MaxIntegerValue<mshadow::bfloat::bf16_t>() {
757 return size_t(2) << 14;
783 NDArray ret(shape, ctx,
false, dtype);
788 return NDArray(stype, shape, ctx,
true, dtype);
798 std::vector<NDArray>* vec) {
801 vec->emplace_back(shape, ctx,
false, dtype);
805 vec->emplace_back(stype, shape, ctx,
true, dtype);
812 template <
typename DType>
814 static index_t copy_block_size = dmlc::GetEnv(
"MXNET_CPU_PARALLEL_SIZE", 200000);
815 if (size >= copy_block_size) {
816 #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
817 for (
index_t i = 0; i < size; ++i) {
821 #pragma GCC diagnostic push
823 #pragma GCC diagnostic ignored "-Wclass-memaccess"
825 std::memcpy(dst, src,
sizeof(DType) * size);
826 #pragma GCC diagnostic pop
833 template <
typename DType>
835 static index_t add_block_size = dmlc::GetEnv(
"MXNET_CPU_PARALLEL_SIZE", 200000);
836 if (size >= add_block_size) {
837 #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
838 for (
index_t i = 0; i < size; ++i) {
842 for (
index_t i = 0; i < size; ++i) {
867 if (shape->
ndim() == 0) {
870 for (
int j = 0; j < shape->
ndim(); ++j) {
871 if ((*shape)[j] == 0) {
879 for (
size_t i = 0; i < shapes->size(); ++i) {
892 for (
int j = 0; j < shape->
ndim(); ++j) {
901 for (
size_t i = 0; i < shapes->size(); ++i) {
907 const std::vector<NDArray*>& state_arrays,
909 const std::function<
void(
const char*,
const char*,
void*)>& monitor_callback);
913 const std::vector<NDArray*>& state_arrays,
915 const std::function<
void(
const char*,
const char*,
void*)>& monitor_callback);
919 const int ndim = src.
ndim();
921 for (
int i = 0; i < ndim; ++i) {
925 CHECK(axes[i] >= 0 && axes[i] < ndim)
926 <<
"axes[" << i <<
"]=" << axes[i] <<
" exceeds the range [" << 0 <<
", " << ndim <<
")";
952 static int bits_of(
const int type_flag) {
955 return sizeof(float) * CHAR_BIT;
957 return sizeof(double) * CHAR_BIT;
959 return sizeof(uint8_t) * CHAR_BIT;
961 return sizeof(int32_t) * CHAR_BIT;
963 return sizeof(int8_t) * CHAR_BIT;
965 return sizeof(int64_t) * CHAR_BIT;
967 return sizeof(bool) * CHAR_BIT;
969 return sizeof(int16_t) * CHAR_BIT;
971 return sizeof(uint16_t) * CHAR_BIT;
973 return sizeof(uint32_t) * CHAR_BIT;
975 return sizeof(uint64_t) * CHAR_BIT;
977 LOG(FATAL) <<
"Unknown type_flag=" << type_flag;
995 return is_float(type1) ? type1 : type2;
1024 if (bits_of(type1) < bits_of(type2)) {
1037 }
else if (bits_of(type2) < bits_of(type1)) {
1070 const std::unordered_map<std::string, std::string>& node_attrs_dict = attrs.
dict;
1071 const std::unordered_map<std::string, std::string>::const_iterator profiler_scope_iter =
1072 node_attrs_dict.find(
"__profiler_scope__");
1073 if (profiler_scope_iter != node_attrs_dict.end()) {
1074 profiler_scope = profiler_scope_iter->second;
1076 return profiler_scope;
1104 *ptr = _aligned_malloc(size, alignment);
1105 if (*ptr ==
nullptr)
1108 int res = posix_memalign(ptr, alignment, size);
1124 return (a + b - 1) / b;
1128 return ((N & (N - 1)) == 0) && N != 0;
1146 #endif // MXNET_COMMON_UTILS_H_
namespace of mxnet
Definition: api_registry.h:33
bool ndim_is_known(const int ndim)
Definition: tuple.h:416
void ExecuteMonOutputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
#define MXNET_STORAGE_DEFAULT_PROFILER_SCOPE_CSTR
Definition: storage.h:34
int size
Definition: utils.h:1091
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:237
ValueType flag_attr_accumulate(const nnvm::NodeAttrs &attrs, const std::string &attr_name)
Definition: utils.h:455
@ kCPU
Definition: base.h:93
Indices should be non-negative, less than the number of columns and in ascending order per row.
Definition: utils.h:94
computaion stream structure, used for asynchronous computations
Definition: tensor.h:488
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:767
@ kUint16
Definition: base.h:361
bool is_unsigned_int(const int dtype)
Definition: utils.h:947
@ kUint64
Definition: base.h:363
void attr_foreach(const nnvm::NodeAttrs &attrs, const std::string &attr_name, const Fn &fn)
Seeks an attribute in a node and its subgraphs and invokes a function on each.
Definition: utils.h:443
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:727
std::string name
name of the operator
Definition: op.h:108
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:517
size_t current_process_id()
Definition: utils.h:69
int GetNumThreadsPerGPU()
Definition: utils.h:566
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:96
@ kDefaultStorage
Definition: ndarray.h:63
@ kCSRIndPtrErr
Definition: ndarray.h:71
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45
A threadlocal store to store threadlocal variables. Will return a thread local singleton of type T.
Definition: thread_local.h:35
const Op * op
The operator this node uses. For place holder variable, op == nullptr.
Definition: node.h:112
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:745
@ kInt8
Definition: base.h:357
bool is_signed_int(const int dtype)
Definition: utils.h:942
@ kUint32
Definition: base.h:362
void AlignedMemFree(void *ptr)
Definition: utils.h:1115
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122
std::string attr_value_string(const nnvm::NodeAttrs &attrs, const std::string &attr_name, std::string default_val="")
Definition: utils.h:432
#define MSHADOW_XINLINE
Definition: base.h:228
void ExecuteMonInputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray * > &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:79
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:108
void ParallelAdd(DType *dst, const DType *src, index_t size)
Definition: utils.h:834
const mxnet::TShape & storage_shape() const
Definition: ndarray.h:252
@ kIndPtr
Definition: ndarray.h:54
void KnownBound
Type of T.
Definition: utils.h:679
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:777
execution time context. The information needed in runtime for actual execution.
Definition: base.h:343
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:528
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
int ndim() const
Definition: tuple.h:217
std::unordered_map< std::string, std::string > dict
The dictionary representation of attributes.
Definition: node.h:116
All the possible information needed by Operator. This is the superset of RunContext....
Definition: op_attr_types.h:66
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
MShadowTypeInfo mshadow_type_info(const int type_flag)
@ kBool
Definition: base.h:359
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp....
Definition: utils.h:618
@ kIdx
Definition: ndarray.h:54
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype....
Definition: utils.h:361
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:499
@ kCPUShared
Definition: base.h:96
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:108
@ kFloat64
Definition: base.h:353
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:77
const mxnet::TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:264
@ kCSRIdxErr
Definition: ndarray.h:72
NDArrayStorageType
Definition: ndarray.h:61
MShadowTypeInfo(const std::string name, const int size)
Definition: utils.h:1097
bool is_np_default_dtype() const
return current numpy default dtype compatibility status.
Definition: imperative.h:234
@ kCPUPinned
Definition: base.h:95
The attributes of the current operation node. Usually are additional parameters like axis,...
Definition: node.h:107
@ kInt16
Definition: base.h:360
Data structures that can appear in graph attributes.
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:597
std::shared_ptr< Node > ObjectPtr
we always used ObjectPtr for a reference pointer to the node, so this alias can be changed in case.
Definition: node.h:49
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:74
int type_promotion(const int type1, const int type2)
Definition: utils.h:983
@ kRSPIdxErr
Definition: ndarray.h:74
std::string name
Definition: utils.h:1090
bool AlignedMemAlloc(void **ptr, size_t size, size_t alignment)
Definition: utils.h:1102
index_t div_round(const index_t a, const index_t b)
Definition: utils.h:1123
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:474
std::vector< std::shared_ptr< Symbol > > subgraphs
Some operators take graphs as input. These operators include control flow operators and high-order fu...
Definition: node.h:137
ndarray interface
Definition: ndarray.h:82
bool is_int(const int dtype)
Definition: utils.h:936
@ kInt64
Definition: base.h:358
void ConvertToLegacyShape(mxnet::TShape *shape)
This is function is used to convert shapes returned by the infer shape functions/pass to the legacy s...
Definition: utils.h:888
header to handle OpenMP compatibility issues
static Imperative * Get()
tensor blob class that can be used to hold tensor of any dimension, any device and any data type,...
Definition: tensor_blob.h:65
const mxnet::TShape & shape() const
Definition: ndarray.h:244
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
@ kInt32
Definition: base.h:356
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:657
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:114
@ kGPU
Definition: base.h:94
int64_t dim_t
data type to store dim size
Definition: tuple.h:39
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:668
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:116
mxnet::TShape CanonicalizeAxes(const mxnet::TShape &src)
Definition: utils.h:917
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:418
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:642
std::string dtype_string(const int dtype)
Definition: base.h:1811
Context information about the execution environment.
Definition: base.h:90
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:271
Additional operator attributes beside the ones provided by NNVM.
Data structures that can appear in graph attributes.
Storage manager across multiple devices.
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:405
void DFSVisit(const std::vector< NodeEntry > &heads, FVisit fvisit)
perform a Post Order DFS visit to each node in the graph. This order is deterministic and is also top...
Definition: graph.h:284
Configuation of nnvm as well as basic data structure.
bool dim_size_is_known(const dim_t dim_size)
Definition: tuple.h:422
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:388
void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:794
size_t RoundToPower2(size_t N)
Definition: utils.h:1131
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:81
@ kCSRStorage
Definition: ndarray.h:65
Definition: optional.h:251
@ kUint8
Definition: base.h:355
void ConvertToNumpyShape(mxnet::TShape *shape)
If numpy compatibility is turned off (default), the shapes passed in by users follow the legacy shape...
Definition: utils.h:866
@ kBfloat16
Definition: base.h:364
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:830
@ kIdx
Definition: ndarray.h:58
Context ctx
base Context
Definition: base.h:345
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:440
Engine that schedules all the operations according to dependency.
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:580
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:72
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:813
NDArray interface that handles array arithematics.
Graph node data structure.
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:50
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:199
Operator structure.
Definition: op.h:105
NDArrayStorageType storage_type() const
Definition: ndarray.h:343
@ kFloat16
Definition: base.h:354
@ kRowSparseStorage
Definition: ndarray.h:64
MShadowTypeInfo(const std::string name, const int size, const int acc_size)
Definition: utils.h:1094
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:760
const std::string NodeAttrsGetProfilerScope(const nnvm::NodeAttrs &attrs)
Definition: utils.h:1067
int GetDefaultDtype()
Definition: utils.h:1079
bool IsPower2(size_t N)
Definition: utils.h:1127
bool is_float(const int dtype)
Definition: utils.h:931
Helper for non-array type T.
Definition: utils.h:653
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&... args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:696
int GetExecNumMatchColor()
Definition: utils.h:573
int acc_size
Definition: utils.h:1092
@ kFloat32
Definition: base.h:352
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:141
#define MSHADOW_TYPE_SWITCH(type, DType,...)
Definition: base.h:1163