mxnet
exec_utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_EXEC_UTILS_H_
25 #define MXNET_COMMON_EXEC_UTILS_H_
26 
27 #include <nnvm/graph.h>
28 #include <nnvm/pass_functions.h>
29 #include <map>
30 #include <vector>
31 #include <string>
32 #include <utility>
33 #include "../common/utils.h"
34 #include "../imperative/exec_pass.h"
35 
36 namespace mxnet {
37 namespace common {
38 
39 #if MXNET_USE_ONEDNN == 1
40 // We have to make sure it's default storage and default layout.
41 #define DEFAULT_DATA(x) x.IsDefaultData()
42 #else
43 #define DEFAULT_DATA(x) (x.storage_type() == kDefaultStorage)
44 #endif
45 
46 /*
47  * \brief setup default-storage tblobs from source NDArrays. If any source NDArray has non-default
48  * storage, it creates a temp NDArray with default storage and uses the temp tblob. The
49  * function also records the indices of non-default source NDArrays and the indices of
50  * their corresponding temporary NDArrays in the temp array.
51  * \param src list of source NDArray
52  * \param blobs list of tblobs to return
53  * \param temp_src list of source NDArrays which requires temporary default storage representation
54  * \param temp_dst list of temporary destination NDArrays for default storage representation
55  * \param idx_map mapping from indices in source NDArrays to indices in temp_dst. When not set,
56  indices are not recorded
57  * \return true if any source NDArray need to cast storage
58  */
59 inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
60  const std::vector<NDArray>* bufs,
61  std::vector<TBlob>* blobs,
62  std::vector<NDArray>* temp_src,
63  std::vector<NDArray>* temp_dst,
64  std::unordered_map<uint32_t, uint32_t>* idx_map) {
65  bool require_cast = false;
66  for (size_t i = 0; i < src.size(); i++) {
67  const auto& nd = src[i];
68  if (!DEFAULT_DATA(nd)) {
69  (*idx_map)[i] = temp_dst->size();
70  NDArray temp =
71  bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
72 #if MXNET_USE_ONEDNN == 1
73  CHECK(temp.IsDefaultData());
74 #endif
75  temp_src->emplace_back(nd);
76  temp_dst->emplace_back(temp);
77  blobs->emplace_back(temp.data());
78  require_cast = true;
79  } else {
80  blobs->push_back(nd.data());
81  }
82  }
83  return require_cast;
84 }
85 
86 inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
87  const std::vector<NDArray>* bufs,
88  std::vector<OpReqType>* req,
89  std::vector<TBlob>* blobs,
90  std::vector<NDArray>* temp_src,
91  std::vector<NDArray>* temp_dst) {
92  bool require_cast = false;
93  for (size_t i = 0; i < src.size(); i++) {
94  const auto& nd = src[i];
95 
96 #if MXNET_USE_ONEDNN == 1
97  if (req->at(i) == kWriteInplace && nd.IsDNNLData())
98  // If it's write inplace and the output array doesn't use the default
99  // layout, we'll generate a temporary output array below, which means
100  // the input array and the output array are no longer the same array.
101  // we should change the request type.
102  req->at(i) = kWriteTo;
103  // We have to make sure it's default storage and default layout.
104 #endif
105  if (!DEFAULT_DATA(nd)) {
106 #if MXNET_USE_ONEDNN == 1
107  NDArray temp;
108  if (bufs != nullptr) {
109  temp = bufs->at(i);
110  } else if (kAddTo == req->at(i)) {
111  temp = nd.IsDNNLData() ? nd.Reorder2Default() : nd;
112  } else {
113  temp = NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
114  }
115  CHECK(temp.IsDefaultData());
116 #else
117  NDArray temp =
118  bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
119 #endif
120  temp_src->emplace_back(nd);
121  temp_dst->emplace_back(temp);
122  blobs->emplace_back(temp.data());
123  require_cast = true;
124  } else {
125  blobs->push_back(nd.data());
126  }
127  }
128  return require_cast;
129 }
130 
131 /*
132  * \brief setup default-storage tblobs for input and output NDArrays.
133  * If any NDArray has non-default storage,
134  * it creates a temp NDArray with default storage and uses the temp tblob. The
135  * function also records the indices of non-default source NDArrays and the indices of
136  * their corresponding temporary NDArrays in the temp array.
137  */
138 inline void SetupDefaultBlobsInOut(const std::vector<NDArray>& ndinputs,
139  const std::vector<NDArray>& ndoutputs,
140  const std::vector<NDArray>* in_bufs,
141  const std::vector<NDArray>* out_bufs,
142  std::vector<OpReqType>* req,
143  std::vector<TBlob>* input_blobs,
144  std::vector<TBlob>* output_blobs,
145  std::vector<NDArray>* pre_temp_src,
146  std::vector<NDArray>* pre_temp_dst,
147  std::vector<NDArray>* post_temp_src,
148  std::vector<NDArray>* post_temp_dst,
149  std::unordered_map<uint32_t, uint32_t>* in_temp_idx_map,
150  const std::vector<uint32_t>& mutate_idx) {
151  // populate input blobs
152  SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst, in_temp_idx_map);
153  // populate output blobs
154  SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst, post_temp_src);
155  // add mutable inputs to post temp list
156  for (const auto idx : mutate_idx) {
157  auto map_iter = in_temp_idx_map->find(idx);
158  if (map_iter != in_temp_idx_map->end()) {
159  post_temp_src->push_back(pre_temp_dst->at(map_iter->second));
160  post_temp_dst->push_back(ndinputs[idx]);
161  }
162  }
163 }
164 
165 /*
166  * \brief cast the NDArrays in `src` and store the result in NDArrays in `dst`.
167  * This is only used for storage fallback in executor.
168  * \param src list of source NDArray to cast
169  * \param dst list of destionation NDArray which hold the result of cast_storage operation
170  * \param ctx operator context for cast_storage operation
171  */
172 inline void CastNonDefaultStorage(const std::vector<NDArray>& src,
173  const std::vector<NDArray>& dst,
174  const OpContext& ctx,
175  const bool is_gpu) {
176  CHECK_EQ(dst.size(), src.size());
177  for (size_t i = 0; i < src.size(); i++) {
178  if (is_gpu) {
179 #if MXNET_USE_CUDA
180  CastStorageDispatch<gpu>(ctx, src[i], dst[i]);
181 #else
182  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
183 #endif
184  } else {
185  CastStorageDispatch<cpu>(ctx, src[i], dst[i]);
186  }
187  }
188 }
189 
193 inline bool SameType(const nnvm::NodeAttrs& attrs,
194  std::vector<int>* iattr,
195  std::vector<int>* oattr) {
196  int def_v = -1;
197  for (int v : *oattr) {
198  if (v != -1) {
199  def_v = v;
200  break;
201  }
202  }
203  if (def_v == -1) {
204  for (int v : *iattr) {
205  if (v != -1) {
206  def_v = v;
207  break;
208  }
209  }
210  }
211  if (def_v == -1)
212  return false;
213  for (int& v : *oattr) {
214  v = def_v;
215  }
216  for (int& v : *iattr) {
217  v = def_v;
218  }
219  return true;
220 }
221 
227 inline bool DefaultStorageType(const nnvm::NodeAttrs& attrs,
228  const int dev_mask,
229  DispatchMode* dispatch_mode,
230  std::vector<int>* iattr,
231  std::vector<int>* oattr) {
232  bool fallback = false;
233  for (int& v : *oattr) {
234  if (v == -1)
235  v = kDefaultStorage;
236  if (v != kDefaultStorage)
237  fallback = true;
238  }
239  for (int& v : *iattr) {
240  if (v == -1)
241  v = kDefaultStorage;
242  if (v != kDefaultStorage)
243  fallback = true;
244  }
245  if (*dispatch_mode == DispatchMode::kUndefined) {
246  if (fallback) {
247  *dispatch_mode = DispatchMode::kFComputeFallback;
248  } else {
249  *dispatch_mode = DispatchMode::kFCompute;
250  }
251  }
252  return true;
253 }
254 
255 // string representation of storage id
256 inline std::string storage_str(int storage_id) {
257  std::string str;
258  if (storage_id == -1) {
259  str = "var (-1)";
260  } else if (storage_id == -2) {
261  str = "external storage (-2)";
262  } else {
263  str = "group " + std::to_string(storage_id);
264  }
265  return str;
266 }
267 
268 /* log the static memory plan of the graph. Example:
269  node 0 var
270  node 1 _copy
271  input 0: [80,3,224,224] (47040 KB) -> var storage (-1)
272  output 1: [80,3,224,224] (47040 KB) -> group 0
273  node 2 var
274  node 3 var
275  node 4 var
276  node 5 var
277  node 6 BatchNorm
278  input 1: [80,3,224,224] (47040 KB) -> group 0
279  input 2: [3] (0 KB) -> var storage (-1)
280  input 3: [3] (0 KB) -> var storage (-1)
281  input 4: [3] (0 KB) -> var storage (-1)
282  input 5: [3] (0 KB) -> var storage (-1)
283  output 6: [80,3,224,224] (47040 KB) -> group 1
284  output 7: [3] (0 KB) -> group 3
285  output 8: [3] (0 KB) -> group 2
286  ...
287  */
288 inline void LogMemoryPlan(const nnvm::Graph& g) {
289  const auto& idx = g.indexed_graph();
290  const auto& vshape = g.GetAttr<mxnet::ShapeVector>("shape");
291  const auto& vtype = g.GetAttr<nnvm::DTypeVector>("dtype");
292  // find node range
293  uint32_t node_start = 0, node_end = idx.num_nodes();
294  if (g.attrs.count("node_range")) {
295  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
296  node_start = range.first;
297  node_end = range.second;
298  }
299  for (uint32_t nid = node_start; nid < node_end; ++nid) {
300  const auto& inode = idx[nid];
301  if (inode.source->is_variable()) {
302  LOG(INFO) << "node " << nid << " var";
303  } else {
304  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name;
305  for (const auto& e : inode.inputs) {
306  auto eid = idx.entry_id(e);
307  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
308  LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " (" << kilo_bytes << " KB)";
309  }
310  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
311  uint32_t eid = idx.entry_id(nid, index);
312  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
313  LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " (" << kilo_bytes << " KB)";
314  }
315  }
316  }
317 }
318 
319 /* log the static memory plan of the graph. Example:
320  node 0 var
321  node 1 _copy: fcompute
322  input 0: default
323  output 1: default
324  node 2 var
325  node 3 Convolution: fcompute
326  input 1: default
327  input 2: default
328  output 3: default
329  node 4 var
330  node 5 var
331  node 6 var
332  node 7 var
333  node 8 BatchNorm: fcompute
334  input 3: default
335  input 4: default
336  input 5: default
337  input 6: default
338  input 7: default
339  output 8: default
340  output 9: default
341  output 10: default
342  ...
343  */
344 inline void LogInferStorage(const nnvm::Graph& g) {
345  const auto& idx = g.indexed_graph();
346  const auto& vstorage_type = g.GetAttr<StorageTypeVector>("storage_type");
347  const auto& dispatch_modes = g.GetAttr<DispatchModeVector>("dispatch_mode");
348  uint32_t node_start = 0, node_end = idx.num_nodes();
349  if (g.attrs.count("node_range")) {
350  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
351  node_start = range.first;
352  node_end = range.second;
353  }
354  for (uint32_t nid = node_start; nid < node_end; ++nid) {
355  const auto& inode = idx[nid];
356  if (inode.source->is_variable()) {
357  LOG(INFO) << "node " << nid << " var";
358  } else {
359  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name << ": "
360  << dispatch_mode_string(dispatch_modes[nid]);
361  for (const auto& e : inode.inputs) {
362  auto eid = idx.entry_id(e);
363  LOG(INFO) << "\t\tinput " << eid << ": " << stype_string(vstorage_type[eid]);
364  }
365  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
366  uint32_t eid = idx.entry_id(nid, index);
367  LOG(INFO) << "\t\toutput " << eid << ": " << stype_string(vstorage_type[eid]);
368  }
369  }
370  }
371 }
372 
381 inline NDArray ReshapeOrCreate(const std::string& name,
382  const mxnet::TShape& dest_arg_shape,
383  const int dest_arg_dtype,
384  const NDArrayStorageType dest_arg_stype,
385  const Context& ctx,
386  std::unordered_map<std::string, NDArray>* shared_buffer,
387  bool enable_row_sparse_sharing) {
388  bool stype_shareable = dest_arg_stype == kDefaultStorage;
389  if (enable_row_sparse_sharing) {
390  stype_shareable = stype_shareable || dest_arg_stype == kRowSparseStorage;
391  }
392  auto it = shared_buffer->find(name);
393  if (it != shared_buffer->end()) {
394  // check if size is large enough for sharing
395  bool size_shareable = it->second.shape().Size() >= dest_arg_shape.Size();
396  if (size_shareable && stype_shareable) { // memory can be reused
397  CHECK_EQ(it->second.dtype(), dest_arg_dtype)
398  << "Requested arg array's dtype does not match that of the reusable ndarray";
399  CHECK_EQ(it->second.storage_type(), dest_arg_stype)
400  << "Requested arg array's stype does not match that of the reusable ndarray";
401  return it->second.Reshape(dest_arg_shape);
402  } else if (stype_shareable) {
403  LOG(WARNING) << "Bucketing: data " << name << " has a shape " << dest_arg_shape
404  << ", which is larger than already allocated shape " << it->second.shape()
405  << ". Need to re-allocate. Consider putting default bucket key to be "
406  << "the bucket taking the largest input for better memory sharing.";
407  // size is not large enough, creating a larger one for sharing
408  // the NDArrays in shared_buffer are guaranteed to be of shareable storages
409  it->second = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
410  return it->second;
411  } else {
412  // not shareable storage
413  return InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
414  }
415  } else {
416  auto ret = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
417  if (stype_shareable) {
418  shared_buffer->emplace(name, ret);
419  }
420  return ret;
421  } // if (it != shared_buffer->end())
422 }
423 
429  const Context& default_ctx,
430  const std::map<std::string, Context>& ctx_map,
431  const std::vector<Context>& in_arg_ctxes,
432  const std::vector<Context>& arg_grad_ctxes,
433  const std::vector<Context>& aux_state_ctxes,
434  const std::vector<OpReqType>& grad_req_types,
435  size_t num_forward_inputs,
436  size_t num_forward_outputs) {
437  const auto& idx = g.indexed_graph();
438  const auto& mutable_nodes = idx.mutable_input_nodes();
439  // default use default context.
440  if (ctx_map.size() == 0) {
441  g.attrs["context"] =
442  std::make_shared<nnvm::any>(exec::ContextVector(idx.num_nodes(), default_ctx));
443  for (const auto& x : in_arg_ctxes) {
444  CHECK(x == default_ctx) << "Input array is in " << x
445  << " while binding with ctx=" << default_ctx
446  << ". All arguments must be in global context (" << default_ctx
447  << ") unless group2ctx is specified for cross-device graph.";
448  }
449  for (const auto& x : arg_grad_ctxes) {
450  CHECK(x == default_ctx) << "Gradient array is in " << x
451  << " while binding with ctx=" << default_ctx
452  << ". All gradients must be in global context (" << default_ctx
453  << ") unless group2ctx is specified for cross-device graph.";
454  }
455  return g;
456  }
457 
458  // otherwise, use context assignment.
459  std::map<Context, int> ctx2id; // map ctx to device id
460  std::vector<Context> ctx_list; // index is device id
461  nnvm::DeviceVector device(idx.num_nodes(), -1); // index is node id
462  nnvm::DeviceAssignMap device_map; // map arg name to device id
463 
464  // loop through the user input ctx_map and
465  // populate maps and lists
466  for (auto& kv : ctx_map) {
467  if (ctx2id.count(kv.second) == 0) { // if context has no device id, create one
468  ctx2id[kv.second] = static_cast<int>(ctx_list.size()); // assign device id to ctx
469  ctx_list.push_back(kv.second); // save ctx to the list
470  }
471  // assign device id to to the arg name with the corresponding ctx
472  device_map[kv.first] = ctx2id.at(kv.second);
473  }
474 
475  // loop through all the rest of input nodes not specified
476  // in the ctx_map and populate maps and lists
477  size_t arg_top = 0, aux_top = 0;
478  for (size_t i = 0; i < num_forward_inputs; ++i) {
479  const uint32_t nid = idx.input_nodes().at(i);
480  Context ctx;
481  if (mutable_nodes.count(nid)) { // aux node is mutable
482  CHECK_LT(aux_top, aux_state_ctxes.size());
483  ctx = aux_state_ctxes[aux_top];
484  ++aux_top;
485  } else { // regular input node is immutable
486  CHECK_LT(arg_top, in_arg_ctxes.size());
487  ctx = in_arg_ctxes[arg_top];
488  ++arg_top;
489  }
490  if (ctx2id.count(ctx) == 0) { // if the current ctx is not in the map of ctx and device id
491  ctx2id[ctx] = static_cast<int>(ctx_list.size()); // assign the current ctx with device id
492  ctx_list.push_back(ctx); // save the current ctx in the list
493  }
494  device[nid] = ctx2id.at(ctx); // assign device id to the current node
495  }
496 
497  // loop through backward input nodes and populate maps and lists
498  // the backward input nodes is the gradient of the loss wrt the output
499  size_t arg_grad_offset = 0;
500  // keep an offset into the arg_grad_ctxes vector,
501  // since g.outputs exclude arg_grad whose req == null
502  CHECK_GE(grad_req_types.size(), g.outputs.size() - num_forward_outputs)
503  << "insufficient number of grad_reqs";
504  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
505  while (grad_req_types[arg_grad_offset] == kNullOp)
506  ++arg_grad_offset;
507  const uint32_t nid = idx.outputs()[i].node_id;
508  Context ctx = arg_grad_ctxes[arg_grad_offset];
509  if (ctx2id.count(ctx) == 0) {
510  ctx2id[ctx] = static_cast<int>(ctx_list.size());
511  ctx_list.push_back(ctx);
512  }
513  int devid = ctx2id.at(ctx);
514  if (device[nid] != -1) {
515  CHECK_EQ(device[nid], devid) << "device of same output not equal to each other";
516  } else {
517  device[nid] = devid;
518  }
519  }
520 
521  g.attrs["device"] = std::make_shared<dmlc::any>(std::move(device));
522  g = nnvm::pass::PlaceDevice(g, "__ctx_group__", device_map, "_CrossDeviceCopy");
523  const auto& assigned_devices = g.GetAttr<nnvm::DeviceVector>("device");
524 
525  exec::ContextVector vcontext;
526  for (auto context : assigned_devices) {
527  if (context == -1) {
528  vcontext.push_back(default_ctx);
529  } else {
530  vcontext.push_back(ctx_list[context]);
531  }
532  }
533 
534  // after device planning, we should check again
535  // if the assigned device of gradient node
536  // corresponds to storage of grads
537  auto& new_idx = g.indexed_graph();
538  arg_grad_offset = 0;
539  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
540  while (grad_req_types[arg_grad_offset] == kNullOp)
541  ++arg_grad_offset;
542  const uint32_t nid = new_idx.outputs()[i].node_id;
543  Context ctx = arg_grad_ctxes[arg_grad_offset];
544  CHECK(ctx == vcontext[nid]) << "Trying to save gradient to " << ctx
545  << " while its source node \"" << new_idx[nid].source->attrs.name
546  << "\" computes it on " << vcontext[nid]
547  << ". Check your ctx in NDArray allocation.";
548  }
549 
550  g.attrs["context"] = std::make_shared<nnvm::any>(std::move(vcontext));
551  return g;
552 }
553 
562 void CopyGraph(nnvm::Graph* dst, const nnvm::Graph& src, bool copy_variables);
563 
572 
573 } // namespace common
574 } // namespace mxnet
575 #endif // MXNET_COMMON_EXEC_UTILS_H_
mxnet
namespace of mxnet
Definition: api_registry.h:33
nnvm::pass::PlaceDevice
Graph PlaceDevice(Graph graph, std::string device_group_attr_key, DeviceAssignMap device_assign_map, std::string device_copy_op)
Place the devices for each operator in the graph.
Definition: pass_functions.h:138
mxnet::kWriteInplace
@ kWriteInplace
perform an inplace write, This option only happen when Target shares memory with one of input argumen...
Definition: op_attr_types.h:55
mxnet::DispatchModeVector
std::vector< DispatchMode > DispatchModeVector
The result holder of dispatch mode of each Node in the graph.
Definition: graph_attr_types.h:60
mxnet::common::CheckForInputNameDuplicates
bool CheckForInputNameDuplicates(const nnvm::IndexedGraph &idx)
Check whether graph contains any duplicated names in its inputs.
nnvm::IndexedGraph::mutable_input_nodes
const std::unordered_set< uint32_t > & mutable_input_nodes() const
Definition: graph.h:181
mxnet::common::SameType
bool SameType(const nnvm::NodeAttrs &attrs, std::vector< int > *iattr, std::vector< int > *oattr)
The default type inference function, which assigns all undefined types to the same type of one of the...
Definition: exec_utils.h:193
mxnet::kDefaultStorage
@ kDefaultStorage
Definition: ndarray.h:63
nnvm::Graph::indexed_graph
const IndexedGraph & indexed_graph() const
get a indexed graph of current graph, if not exist, create it on demand
mxnet::common::cuda::rtc::util::to_string
std::string to_string(OpReqType req)
Convert OpReqType to string.
pass_functions.h
Pass functions that simply redirect the calls to ApplyPass.
mxnet::NDArray::data
const TBlob & data() const
Definition: ndarray.h:298
nnvm::Graph
Symbolic computation graph. This is the intermediate representation for optimization pass.
Definition: graph.h:47
mxnet::common::SetupDefaultBlobsInOut
void SetupDefaultBlobsInOut(const std::vector< NDArray > &ndinputs, const std::vector< NDArray > &ndoutputs, const std::vector< NDArray > *in_bufs, const std::vector< NDArray > *out_bufs, std::vector< OpReqType > *req, std::vector< TBlob > *input_blobs, std::vector< TBlob > *output_blobs, std::vector< NDArray > *pre_temp_src, std::vector< NDArray > *pre_temp_dst, std::vector< NDArray > *post_temp_src, std::vector< NDArray > *post_temp_dst, std::unordered_map< uint32_t, uint32_t > *in_temp_idx_map, const std::vector< uint32_t > &mutate_idx)
Definition: exec_utils.h:138
mxnet::DispatchMode
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122
mxnet::common::SetupDefaultBlobsIn
bool SetupDefaultBlobsIn(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst, std::unordered_map< uint32_t, uint32_t > *idx_map)
Definition: exec_utils.h:59
MXNET_GPU_NOT_ENABLED_ERROR
#define MXNET_GPU_NOT_ENABLED_ERROR
Error message for using gpu when MXNET_USE_CUDA==0.
Definition: libinfo.h:77
nnvm::IndexedGraph
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:108
mxnet::common::InitZeros
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:777
mxnet::kNullOp
@ kNullOp
no operation, do not write anything
Definition: op_attr_types.h:47
mxnet::common::DefaultStorageType
bool DefaultStorageType(const nnvm::NodeAttrs &attrs, const int dev_mask, DispatchMode *dispatch_mode, std::vector< int > *iattr, std::vector< int > *oattr)
The default storage type inference function, which assigns all undefined storage types to kDefaultSto...
Definition: exec_utils.h:227
mxnet::OpContext
All the possible information needed by Operator. This is the superset of RunContext....
Definition: op_attr_types.h:66
mxnet::StorageTypeVector
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
mxnet::common::CopyGraph
void CopyGraph(nnvm::Graph *dst, const nnvm::Graph &src, bool copy_variables)
Copy the graph, optionally leaving original Variable nodes.
mxnet::common::storage_str
std::string storage_str(int storage_id)
Definition: exec_utils.h:256
nnvm::Graph::attrs
std::unordered_map< std::string, std::shared_ptr< any > > attrs
attributes of a graph Note that attribute is shared pointer and can be shared across graphs.
Definition: graph.h:61
mxnet::DispatchMode::kFComputeFallback
@ kFComputeFallback
mxnet::NDArrayStorageType
NDArrayStorageType
Definition: ndarray.h:61
nnvm::IndexedGraph::outputs
const std::vector< NodeEntry > & outputs() const
Definition: graph.h:185
nnvm::NodeAttrs
The attributes of the current operation node. Usually are additional parameters like axis,...
Definition: node.h:107
nnvm::DeviceVector
std::vector< int > DeviceVector
The result holder of device of each operator in the graph.
Definition: graph_attr_types.h:104
mxnet::NDArray
ndarray interface
Definition: ndarray.h:82
mxnet::common::CastNonDefaultStorage
void CastNonDefaultStorage(const std::vector< NDArray > &src, const std::vector< NDArray > &dst, const OpContext &ctx, const bool is_gpu)
Definition: exec_utils.h:172
mshadow::mshadow_sizeof
size_t mshadow_sizeof(int type)
get data type size from type enum
Definition: base.h:1804
mxnet::kWriteTo
@ kWriteTo
write gradient to provided space
Definition: op_attr_types.h:49
mxnet::Context
Context information about the execution environment.
Definition: base.h:90
mxnet::common::SetupDefaultBlobsOut
bool SetupDefaultBlobsOut(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< OpReqType > *req, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst)
Definition: exec_utils.h:86
mxnet::TShape::Size
size_t Size() const
Definition: tuple.h:523
mxnet::common::AssignContext
nnvm::Graph AssignContext(nnvm::Graph g, const Context &default_ctx, const std::map< std::string, Context > &ctx_map, const std::vector< Context > &in_arg_ctxes, const std::vector< Context > &arg_grad_ctxes, const std::vector< Context > &aux_state_ctxes, const std::vector< OpReqType > &grad_req_types, size_t num_forward_inputs, size_t num_forward_outputs)
Assign context to the graph. This is triggered by both simple_bind and bind flows.
Definition: exec_utils.h:428
nnvm::Graph::GetAttr
const T & GetAttr(const std::string &attr_name) const
Get the immutable attribute from attrs.
Definition: graph.h:230
mxnet::common::stype_string
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:405
graph.h
Configuation of nnvm as well as basic data structure.
mxnet::common::dispatch_mode_string
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:388
mxnet::ShapeVector
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:830
DEFAULT_DATA
#define DEFAULT_DATA(x)
Definition: exec_utils.h:43
mxnet::TShape
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:440
mxnet::DispatchMode::kUndefined
@ kUndefined
nnvm::DeviceAssignMap
std::unordered_map< std::string, int > DeviceAssignMap
The result holder of device of each operator in the graph.
Definition: graph_attr_types.h:112
mxnet::kAddTo
@ kAddTo
add to the provided space
Definition: op_attr_types.h:57
mxnet::kRowSparseStorage
@ kRowSparseStorage
Definition: ndarray.h:64
nnvm::DTypeVector
std::vector< int > DTypeVector
The result holder of type of each NodeEntry in the graph.
Definition: graph_attr_types.h:76
mshadow::expr::range
RangeExp< DType > range(DType start, DType stop, DType step=1, int repeat=1)
Definition: range.h:55
mxnet::common::LogInferStorage
void LogInferStorage(const nnvm::Graph &g)
Definition: exec_utils.h:344
mxnet::common::LogMemoryPlan
void LogMemoryPlan(const nnvm::Graph &g)
Definition: exec_utils.h:288
mxnet::common::ReshapeOrCreate
NDArray ReshapeOrCreate(const std::string &name, const mxnet::TShape &dest_arg_shape, const int dest_arg_dtype, const NDArrayStorageType dest_arg_stype, const Context &ctx, std::unordered_map< std::string, NDArray > *shared_buffer, bool enable_row_sparse_sharing)
If the requested ndarray's shape size is less than the corresponding shared_data_array's shape size a...
Definition: exec_utils.h:381
nnvm::Graph::outputs
std::vector< NodeEntry > outputs
outputs of the computation graph.
Definition: graph.h:50
mxnet::DispatchMode::kFCompute
@ kFCompute