mxnet
exec_utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_EXEC_UTILS_H_
25 #define MXNET_COMMON_EXEC_UTILS_H_
26 
27 #include <nnvm/graph.h>
28 #include <nnvm/pass_functions.h>
29 #include <map>
30 #include <vector>
31 #include <string>
32 #include <utility>
33 #include "../common/utils.h"
34 #include "../executor/exec_pass.h"
35 
36 namespace mxnet {
37 namespace common {
38 
39 /*
40  * \brief setup default-storage tblobs from source NDArrays. If any source NDArray has non-default
41  * storage, it creates a temp NDArray with default storage and uses the temp tblob. The
42  * function also records the indices of non-default source NDArrays and the indices of
43  * their corresponding temporary NDArrays in the temp array.
44  * \param src list of source NDArray
45  * \param blobs list of tblobs to return
46  * \param temp_src list of source NDArrays which requires temporary default storage representation
47  * \param temp_dst list of temporary destination NDArrays for default storage representation
48  * \param idx_map mapping from indices in source NDArrays to indices in temp_dst. When not set,
49  indices are not recorded
50  * \return true if any source NDArray need to cast storage
51  */
52 inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
53  const std::vector<NDArray> *bufs,
54  std::vector<TBlob> *blobs,
55  std::vector<NDArray> *temp_src,
56  std::vector<NDArray> *temp_dst,
57  std::unordered_map<uint32_t, uint32_t> *idx_map) {
58  bool require_cast = false;
59  for (size_t i = 0; i < src.size(); i++) {
60  auto& nd = src[i];
61  bool is_default = nd.storage_type() == kDefaultStorage;
62 #if MXNET_USE_MKLDNN == 1
63  // We have to make sure it's default storage and default layout.
64  is_default = nd.IsDefaultData();
65 #endif
66  if (!is_default) {
67  (*idx_map)[i] = temp_dst->size();
68  NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
69  true, nd.dtype());
70 #if MXNET_USE_MKLDNN == 1
71  CHECK(temp.IsDefaultData());
72 #endif
73  temp_src->emplace_back(nd);
74  temp_dst->emplace_back(temp);
75  blobs->emplace_back(temp.data());
76  require_cast = true;
77  } else {
78  blobs->push_back(nd.data());
79  }
80  }
81  return require_cast;
82 }
83 
84 inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
85  const std::vector<NDArray> *bufs,
86  std::vector<OpReqType> *req,
87  std::vector<TBlob> *blobs,
88  std::vector<NDArray> *temp_src,
89  std::vector<NDArray> *temp_dst) {
90  bool require_cast = false;
91  for (size_t i = 0; i < src.size(); i++) {
92  auto& nd = src[i];
93  bool is_default = nd.storage_type() == kDefaultStorage;
94 #if MXNET_USE_MKLDNN == 1
95  if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
96  // If it's write inplace and the output array doesn't use the default
97  // layout, we'll generate a temporary output array below, which means
98  // the input array and the output array are no longer the same array.
99  // we should change the request type.
100  req->at(i) = kWriteTo;
101  // We have to make sure it's default storage and default layout.
102  is_default = nd.IsDefaultData();
103 #endif
104  if (!is_default) {
105 #if MXNET_USE_MKLDNN == 1
106  NDArray temp;
107  if (bufs != nullptr) {
108  temp = bufs->at(i);
109  } else if (kAddTo == req->at(i) && nd.IsMKLDNNData()) {
110  temp = nd.Reorder2Default();
111  } else if (kAddTo == req->at(i)) {
112  temp = nd;
113  } else {
114  temp = NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
115  }
116  CHECK(temp.IsDefaultData());
117 #else
118  NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
119  true, nd.dtype());
120 #endif
121  temp_src->emplace_back(nd);
122  temp_dst->emplace_back(temp);
123  blobs->emplace_back(temp.data());
124  require_cast = true;
125  } else {
126  blobs->push_back(nd.data());
127  }
128  }
129  return require_cast;
130 }
131 
132 /*
133  * \brief setup default-storage tblobs for input and output NDArrays.
134  * If any NDArray has non-default storage,
135  * it creates a temp NDArray with default storage and uses the temp tblob. The
136  * function also records the indices of non-default source NDArrays and the indices of
137  * their corresponding temporary NDArrays in the temp array.
138  */
139 inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
140  const std::vector<NDArray> &ndoutputs,
141  const std::vector<NDArray> *in_bufs,
142  const std::vector<NDArray> *out_bufs,
143  std::vector<OpReqType> *req,
144  std::vector<TBlob> *input_blobs,
145  std::vector<TBlob> *output_blobs,
146  std::vector<NDArray> *pre_temp_src,
147  std::vector<NDArray> *pre_temp_dst,
148  std::vector<NDArray> *post_temp_src,
149  std::vector<NDArray> *post_temp_dst,
150  std::unordered_map<uint32_t, uint32_t> *in_temp_idx_map,
151  const std::vector<uint32_t> &mutate_idx) {
152  // populate input blobs
153  SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst,
154  in_temp_idx_map);
155  // populate output blobs
156  SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst,
157  post_temp_src);
158  // add mutable inputs to post temp list
159  for (const auto idx : mutate_idx) {
160  auto map_iter = in_temp_idx_map->find(idx);
161  if (map_iter != in_temp_idx_map->end()) {
162  post_temp_src->push_back(pre_temp_dst->at(map_iter->second));
163  post_temp_dst->push_back(ndinputs[idx]);
164  }
165  }
166 }
167 
168 /*
169  * \brief cast the NDArrays in `src` and store the result in NDArrays in `dst`.
170  * This is only used for storage fallback in executor.
171  * \param src list of source NDArray to cast
172  * \param dst list of destionation NDArray which hold the result of cast_storage operation
173  * \param ctx operator context for cast_storage operation
174  */
175 inline void CastNonDefaultStorage(const std::vector<NDArray>& src,
176  const std::vector<NDArray>& dst,
177  const OpContext& ctx,
178  const bool is_gpu) {
179  CHECK_EQ(dst.size(), src.size());
180  for (size_t i = 0; i < src.size(); i++) {
181  if (is_gpu) {
182 #if MXNET_USE_CUDA
183  CastStorageDispatch<gpu>(ctx, src[i], dst[i]);
184 #else
185  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
186 #endif
187  } else {
188  CastStorageDispatch<cpu>(ctx, src[i], dst[i]);
189  }
190  }
191 }
192 
196 inline bool SameType(const nnvm::NodeAttrs& attrs,
197  std::vector<int> *iattr,
198  std::vector<int> *oattr) {
199  int def_v = -1;
200  for (int v : *oattr) {
201  if (v != -1) {
202  def_v = v; break;
203  }
204  }
205  if (def_v == -1) {
206  for (int v : *iattr) {
207  if (v != -1) {
208  def_v = v; break;
209  }
210  }
211  }
212  if (def_v == -1) return false;
213  for (int& v : *oattr) {
214  v = def_v;
215  }
216  for (int& v : *iattr) {
217  v = def_v;
218  }
219  return true;
220 }
221 
222 
228 inline bool DefaultStorageType(const nnvm::NodeAttrs& attrs,
229  const int dev_mask,
230  DispatchMode* dispatch_mode,
231  std::vector<int> *iattr,
232  std::vector<int> *oattr) {
233  bool fallback = false;
234  for (int& v : *oattr) {
235  if (v == -1) v = kDefaultStorage;
236  if (v != kDefaultStorage) fallback = true;
237  }
238  for (int& v : *iattr) {
239  if (v == -1) v = kDefaultStorage;
240  if (v != kDefaultStorage) fallback = true;
241  }
242  if (*dispatch_mode == DispatchMode::kUndefined) {
243  if (fallback) {
244  *dispatch_mode = DispatchMode::kFComputeFallback;
245  } else {
246  *dispatch_mode = DispatchMode::kFCompute;
247  }
248  }
249  return true;
250 }
251 
252 // string representation of storage id
253 inline std::string storage_str(int storage_id) {
254  std::string str;
255  if (storage_id == -1) {
256  str = "var (-1)";
257  } else if (storage_id == -2) {
258  str = "external storage (-2)";
259  } else {
260  str = "group " + std::to_string(storage_id);
261  }
262  return str;
263 }
264 
265 /* log the static memory plan of the graph. Example:
266  node 0 var
267  node 1 _copy
268  input 0: [80,3,224,224] (47040 KB) -> var storage (-1)
269  output 1: [80,3,224,224] (47040 KB) -> group 0
270  node 2 var
271  node 3 var
272  node 4 var
273  node 5 var
274  node 6 BatchNorm
275  input 1: [80,3,224,224] (47040 KB) -> group 0
276  input 2: [3] (0 KB) -> var storage (-1)
277  input 3: [3] (0 KB) -> var storage (-1)
278  input 4: [3] (0 KB) -> var storage (-1)
279  input 5: [3] (0 KB) -> var storage (-1)
280  output 6: [80,3,224,224] (47040 KB) -> group 1
281  output 7: [3] (0 KB) -> group 3
282  output 8: [3] (0 KB) -> group 2
283  ...
284  */
285 inline void LogMemoryPlan(const nnvm::Graph& g) {
286  const auto &idx = g.indexed_graph();
287  const auto& vshape = g.GetAttr<mxnet::ShapeVector>("shape");
288  const auto& vtype = g.GetAttr<nnvm::DTypeVector>("dtype");
289  // find node range
290  uint32_t node_start = 0, node_end = idx.num_nodes();
291  if (g.attrs.count("node_range")) {
292  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
293  node_start = range.first;
294  node_end = range.second;
295  }
296  for (uint32_t nid = node_start; nid < node_end; ++nid) {
297  const auto& inode = idx[nid];
298  if (inode.source->is_variable()) {
299  LOG(INFO) << "node " << nid << " var";
300  } else {
301  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name;
302  for (const auto& e : inode.inputs) {
303  auto eid = idx.entry_id(e);
304  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
305  LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " ("
306  << kilo_bytes << " KB)";
307  }
308  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
309  uint32_t eid = idx.entry_id(nid, index);
310  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
311  LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " ("
312  << kilo_bytes << " KB)";
313  }
314  }
315  }
316 }
317 
318 /* log the static memory plan of the graph. Example:
319  node 0 var
320  node 1 _copy: fcompute
321  input 0: default
322  output 1: default
323  node 2 var
324  node 3 Convolution: fcompute
325  input 1: default
326  input 2: default
327  output 3: default
328  node 4 var
329  node 5 var
330  node 6 var
331  node 7 var
332  node 8 BatchNorm: fcompute
333  input 3: default
334  input 4: default
335  input 5: default
336  input 6: default
337  input 7: default
338  output 8: default
339  output 9: default
340  output 10: default
341  ...
342  */
343 inline void LogInferStorage(const nnvm::Graph& g) {
344  const auto &idx = g.indexed_graph();
345  const auto& vstorage_type = g.GetAttr<StorageTypeVector>("storage_type");
346  const auto& dispatch_modes = g.GetAttr<DispatchModeVector>("dispatch_mode");
347  uint32_t node_start = 0, node_end = idx.num_nodes();
348  if (g.attrs.count("node_range")) {
349  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
350  node_start = range.first;
351  node_end = range.second;
352  }
353  for (uint32_t nid = node_start; nid < node_end; ++nid) {
354  const auto& inode = idx[nid];
355  if (inode.source->is_variable()) {
356  LOG(INFO) << "node " << nid << " var";
357  } else {
358  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name
359  << ": " << dispatch_mode_string(dispatch_modes[nid]);
360  for (const auto& e : inode.inputs) {
361  auto eid = idx.entry_id(e);
362  LOG(INFO) << "\t\tinput " << eid << ": " << stype_string(vstorage_type[eid]);
363  }
364  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
365  uint32_t eid = idx.entry_id(nid, index);
366  LOG(INFO) << "\t\toutput " << eid << ": " << stype_string(vstorage_type[eid]);
367  }
368  }
369  }
370 }
371 
372 // prints a helpful message after shape inference errors in executor.
373 inline void HandleInferShapeError(const size_t num_forward_inputs,
374  const nnvm::IndexedGraph& idx,
375  const mxnet::ShapeVector& inferred_shapes) {
376  int cnt = 10;
377  std::ostringstream oss;
378  for (size_t i = 0; i < num_forward_inputs; ++i) {
379  const uint32_t nid = idx.input_nodes().at(i);
380  const uint32_t eid = idx.entry_id(nid, 0);
381  const mxnet::TShape& inferred_shape = inferred_shapes[eid];
382  if (!shape_is_known(inferred_shape)) {
383  const std::string& arg_name = idx[nid].source->attrs.name;
384  oss << arg_name << ": " << inferred_shape << ", ";
385  if (--cnt == 0) {
386  oss << "...";
387  break;
388  }
389  }
390  }
391  LOG(FATAL) << "InferShape pass cannot decide shapes for the following arguments "
392  "(-1 means unknown dimensions). Please consider providing them as inputs:\n"
393  << oss.str();
394 }
395 
396 // prints a helpful message after type inference errors in executor.
397 inline void HandleInferTypeError(const size_t num_forward_inputs,
398  const nnvm::IndexedGraph& idx,
399  const nnvm::DTypeVector& inferred_dtypes) {
400  int cnt = 10;
401  std::ostringstream oss;
402  for (size_t i = 0; i < num_forward_inputs; ++i) {
403  const uint32_t nid = idx.input_nodes().at(i);
404  const uint32_t eid = idx.entry_id(nid, 0);
405  const int inferred_dtype = inferred_dtypes[eid];
406  if (inferred_dtype == -1) {
407  const std::string& arg_name = idx[nid].source->attrs.name;
408  oss << arg_name << ": " << inferred_dtype << ", ";
409  if (--cnt == 0) {
410  oss << "...";
411  break;
412  }
413  }
414  }
415  LOG(FATAL) << "InferType pass cannot decide dtypes for the following arguments "
416  "(-1 means unknown dtype). Please consider providing them as inputs:\n"
417  << oss.str();
418 }
419 
420 // prints a helpful message after storage type checking errors in executor.
421 inline void HandleInferStorageTypeError(const size_t num_forward_inputs,
422  const nnvm::IndexedGraph& idx,
423  const StorageTypeVector& inferred_stypes) {
424  int cnt = 10;
425  std::ostringstream oss;
426  for (size_t i = 0; i < num_forward_inputs; ++i) {
427  const uint32_t nid = idx.input_nodes().at(i);
428  const uint32_t eid = idx.entry_id(nid, 0);
429  const int inferred_stype = inferred_stypes[eid];
430  if (inferred_stype == -1) {
431  const std::string& arg_name = idx[nid].source->attrs.name;
432  oss << arg_name << ": " << common::stype_string(inferred_stype) << ", ";
433  if (--cnt == 0) {
434  oss << "...";
435  break;
436  }
437  }
438  }
439  LOG(FATAL) << "InferStorageType pass cannot decide storage type for the following arguments "
440  "(-1 means unknown stype). Please consider providing them as inputs:\n"
441  << oss.str();
442 }
443 
452 inline NDArray ReshapeOrCreate(const std::string& name,
453  const mxnet::TShape& dest_arg_shape,
454  const int dest_arg_dtype,
455  const NDArrayStorageType dest_arg_stype,
456  const Context& ctx,
457  std::unordered_map<std::string, NDArray>* shared_buffer,
458  bool enable_row_sparse_sharing) {
459  bool stype_shareable = dest_arg_stype == kDefaultStorage;
460  if (enable_row_sparse_sharing) {
461  stype_shareable = stype_shareable || dest_arg_stype == kRowSparseStorage;
462  }
463  auto it = shared_buffer->find(name);
464  if (it != shared_buffer->end()) {
465  // check if size is large enough for sharing
466  bool size_shareable = it->second.shape().Size() >= dest_arg_shape.Size();
467  if (size_shareable && stype_shareable) { // memory can be reused
468  CHECK_EQ(it->second.dtype(), dest_arg_dtype)
469  << "Requested arg array's dtype does not match that of the reusable ndarray";
470  CHECK_EQ(it->second.storage_type(), dest_arg_stype)
471  << "Requested arg array's stype does not match that of the reusable ndarray";
472  return it->second.Reshape(dest_arg_shape);
473  } else if (stype_shareable) {
474  LOG(WARNING) << "Bucketing: data " << name << " has a shape " << dest_arg_shape
475  << ", which is larger than already allocated shape " << it->second.shape()
476  << ". Need to re-allocate. Consider putting default bucket key to be "
477  << "the bucket taking the largest input for better memory sharing.";
478  // size is not large enough, creating a larger one for sharing
479  // the NDArrays in shared_buffer are guaranteed to be of shareable storages
480  it->second = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
481  return it->second;
482  } else {
483  // not shareable storage
484  return InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
485  }
486  } else {
487  auto ret = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
488  if (stype_shareable) {
489  shared_buffer->emplace(name, ret);
490  }
491  return ret;
492  } // if (it != shared_buffer->end())
493 }
494 
500  const Context& default_ctx,
501  const std::map<std::string, Context>& ctx_map,
502  const std::vector<Context>& in_arg_ctxes,
503  const std::vector<Context>& arg_grad_ctxes,
504  const std::vector<Context>& aux_state_ctxes,
505  const std::vector<OpReqType>& grad_req_types,
506  size_t num_forward_inputs,
507  size_t num_forward_outputs) {
508  const auto& idx = g.indexed_graph();
509  const auto& mutable_nodes = idx.mutable_input_nodes();
510  // default use default context.
511  if (ctx_map.size() == 0) {
512  g.attrs["context"] = std::make_shared<nnvm::any>(
513  exec::ContextVector(idx.num_nodes(), default_ctx));
514  for (const auto& x : in_arg_ctxes) {
515  CHECK(x == default_ctx)
516  << "Input array is in " << x << " while binding with ctx=" << default_ctx
517  << ". All arguments must be in global context (" << default_ctx
518  << ") unless group2ctx is specified for cross-device graph.";
519  }
520  for (const auto& x : arg_grad_ctxes) {
521  CHECK(x == default_ctx)
522  << "Gradient array is in " << x << " while binding with ctx="
523  << default_ctx << ". All gradients must be in global context (" << default_ctx
524  << ") unless group2ctx is specified for cross-device graph.";
525  }
526  return g;
527  }
528 
529  // otherwise, use context assignment.
530  std::map<Context, int> ctx2id; // map ctx to device id
531  std::vector<Context> ctx_list; // index is device id
532  nnvm::DeviceVector device(idx.num_nodes(), -1); // index is node id
533  nnvm::DeviceAssignMap device_map; // map arg name to device id
534 
535  // loop through the user input ctx_map and
536  // populate maps and lists
537  for (auto &kv : ctx_map) {
538  if (ctx2id.count(kv.second) == 0) { // if context has no device id, create one
539  ctx2id[kv.second] = static_cast<int>(ctx_list.size()); // assign device id to ctx
540  ctx_list.push_back(kv.second); // save ctx to the list
541  }
542  // assign device id to to the arg name with the corresponding ctx
543  device_map[kv.first] = ctx2id.at(kv.second);
544  }
545 
546  // loop through all the rest of input nodes not specified
547  // in the ctx_map and populate maps and lists
548  size_t arg_top = 0, aux_top = 0;
549  for (size_t i = 0; i < num_forward_inputs; ++i) {
550  const uint32_t nid = idx.input_nodes().at(i);
551  Context ctx;
552  if (mutable_nodes.count(nid)) { // aux node is mutable
553  CHECK_LT(aux_top, aux_state_ctxes.size());
554  ctx = aux_state_ctxes[aux_top];
555  ++aux_top;
556  } else { // regular input node is immutable
557  CHECK_LT(arg_top, in_arg_ctxes.size());
558  ctx = in_arg_ctxes[arg_top];
559  ++arg_top;
560  }
561  if (ctx2id.count(ctx) == 0) { // if the current ctx is not in the map of ctx and device id
562  ctx2id[ctx] = static_cast<int>(ctx_list.size()); // assign the current ctx with device id
563  ctx_list.push_back(ctx); // save the current ctx in the list
564  }
565  device[nid] = ctx2id.at(ctx); // assign device id to the current node
566  }
567 
568  // loop through backward input nodes and populate maps and lists
569  // the backward input nodes is the gradient of the loss wrt the output
570  size_t arg_grad_offset = 0;
571  // keep an offset into the arg_grad_ctxes vector,
572  // since g.outputs exclude arg_grad whose req == null
573  CHECK_GE(grad_req_types.size(), g.outputs.size() - num_forward_outputs)
574  << "insufficient number of grad_reqs";
575  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
576  while (grad_req_types[arg_grad_offset] == kNullOp) ++arg_grad_offset;
577  const uint32_t nid = idx.outputs()[i].node_id;
578  Context ctx = arg_grad_ctxes[arg_grad_offset];
579  if (ctx2id.count(ctx) == 0) {
580  ctx2id[ctx] = static_cast<int>(ctx_list.size());
581  ctx_list.push_back(ctx);
582  }
583  int devid = ctx2id.at(ctx);
584  if (device[nid] != -1) {
585  CHECK_EQ(device[nid], devid) << "device of same output not equal to each other";
586  } else {
587  device[nid] = devid;
588  }
589  }
590 
591  g.attrs["device"] = std::make_shared<dmlc::any>(std::move(device));
592  g = nnvm::pass::PlaceDevice(g, "__ctx_group__", device_map, "_CrossDeviceCopy");
593  const auto& assigned_devices = g.GetAttr<nnvm::DeviceVector>("device");
594 
595  exec::ContextVector vcontext;
596  for (auto context : assigned_devices) {
597  if (context == -1) {
598  vcontext.push_back(default_ctx);
599  } else {
600  vcontext.push_back(ctx_list[context]);
601  }
602  }
603 
604  // after device planning, we should check again
605  // if the assigned device of gradient node
606  // corresponds to storage of grads
607  auto &new_idx = g.indexed_graph();
608  arg_grad_offset = 0;
609  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
610  while (grad_req_types[arg_grad_offset] == kNullOp) ++arg_grad_offset;
611  const uint32_t nid = new_idx.outputs()[i].node_id;
612  Context ctx = arg_grad_ctxes[arg_grad_offset];
613  CHECK(ctx == vcontext[nid])
614  << "Trying to save gradient to " << ctx
615  << " while its source node \"" << new_idx[nid].source->attrs.name
616  << "\" computes it on " << vcontext[nid]
617  << ". Check your ctx in NDArray allocation.";
618  }
619 
620  g.attrs["context"] = std::make_shared<nnvm::any>(std::move(vcontext));
621  return g;
622 }
623 
632 void CopyGraph(nnvm::Graph *dst, const nnvm::Graph &src, bool copy_variables);
633 
642 
643 } // namespace common
644 } // namespace mxnet
645 #endif // MXNET_COMMON_EXEC_UTILS_H_
646 
Definition: ndarray.h:63
NDArrayStorageType
Definition: ndarray.h:61
bool SameType(const nnvm::NodeAttrs &attrs, std::vector< int > *iattr, std::vector< int > *oattr)
The default type inference function, which assigns all undefined types to the same type of one of the...
Definition: exec_utils.h:196
NDArray ReshapeOrCreate(const std::string &name, const mxnet::TShape &dest_arg_shape, const int dest_arg_dtype, const NDArrayStorageType dest_arg_stype, const Context &ctx, std::unordered_map< std::string, NDArray > *shared_buffer, bool enable_row_sparse_sharing)
If the requested ndarray&#39;s shape size is less than the corresponding shared_data_array&#39;s shape size a...
Definition: exec_utils.h:452
std::vector< DispatchMode > DispatchModeVector
The result holder of dispatch mode of each Node in the graph.
Definition: graph_attr_types.h:60
no operation, do not write anything
Definition: op_attr_types.h:47
The attributes of the current operation node. Usually are additional parameters like axis...
Definition: node.h:120
const std::unordered_set< uint32_t > & mutable_input_nodes() const
Definition: graph.h:195
write gradient to provided space
Definition: op_attr_types.h:49
namespace of mxnet
Definition: base.h:89
#define MXNET_GPU_NOT_ENABLED_ERROR
Error message for using gpu when MXNET_USE_CUDA==0.
Definition: libinfo.h:74
const IndexedGraph & indexed_graph() const
get a indexed graph of current graph, if not exist, create it on demand
std::vector< int > DeviceVector
The result holder of device of each operator in the graph.
Definition: graph_attr_types.h:104
const std::vector< uint32_t > & input_nodes() const
Definition: graph.h:191
Pass functions that simply redirect the calls to ApplyPass.
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:411
std::unordered_map< std::string, int > DeviceAssignMap
The result holder of device of each operator in the graph.
Definition: graph_attr_types.h:112
std::unordered_map< std::string, std::shared_ptr< any > > attrs
attributes of a graph Note that attribute is shared pointer and can be shared across graphs...
Definition: graph.h:61
uint32_t entry_id(uint32_t node_id, uint32_t index) const
Get a unique entry id between 0 to num_node_entries() for a given IndexedGraph::NodeEntry.
Definition: graph.h:145
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:793
Graph PlaceDevice(Graph graph, std::string device_group_attr_key, DeviceAssignMap device_assign_map, std::string device_copy_op)
Place the devices for each operator in the graph.
Definition: pass_functions.h:145
Symbolic computation graph. This is the intermediate representation for optimization pass...
Definition: graph.h:47
size_t Size() const
Definition: tuple.h:494
void HandleInferStorageTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const StorageTypeVector &inferred_stypes)
Definition: exec_utils.h:421
nnvm::Graph AssignContext(nnvm::Graph g, const Context &default_ctx, const std::map< std::string, Context > &ctx_map, const std::vector< Context > &in_arg_ctxes, const std::vector< Context > &arg_grad_ctxes, const std::vector< Context > &aux_state_ctxes, const std::vector< OpReqType > &grad_req_types, size_t num_forward_inputs, size_t num_forward_outputs)
Assign context to the graph. This is triggered by both simple_bind and bind flows.
Definition: exec_utils.h:499
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:108
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:393
void CastNonDefaultStorage(const std::vector< NDArray > &src, const std::vector< NDArray > &dst, const OpContext &ctx, const bool is_gpu)
Definition: exec_utils.h:175
std::string storage_str(int storage_id)
Definition: exec_utils.h:253
Definition: ndarray.h:64
bool SetupDefaultBlobsIn(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst, std::unordered_map< uint32_t, uint32_t > *idx_map)
Definition: exec_utils.h:52
std::vector< int > DTypeVector
The result holder of type of each NodeEntry in the graph.
Definition: graph_attr_types.h:76
perform an inplace write, This option only happen when Target shares memory with one of input argumen...
Definition: op_attr_types.h:55
size_t mshadow_sizeof(int type)
get data type size from type enum
Definition: base.h:1284
const T & GetAttr(const std::string &attr_name) const
Get the immutable attribute from attrs.
Definition: graph.h:248
void CopyGraph(nnvm::Graph *dst, const nnvm::Graph &src, bool copy_variables)
Copy the graph, optionally leaving original Variable nodes.
Configuation of nnvm as well as basic data structure.
void HandleInferShapeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const mxnet::ShapeVector &inferred_shapes)
Definition: exec_utils.h:373
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:413
RangeExp< DType > range(DType start, DType stop, DType step=1, int repeat=1)
Definition: range.h:37
std::vector< NodeEntry > outputs
outputs of the computation graph.
Definition: graph.h:50
const TBlob & data() const
Definition: ndarray.h:278
void SetupDefaultBlobsInOut(const std::vector< NDArray > &ndinputs, const std::vector< NDArray > &ndoutputs, const std::vector< NDArray > *in_bufs, const std::vector< NDArray > *out_bufs, std::vector< OpReqType > *req, std::vector< TBlob > *input_blobs, std::vector< TBlob > *output_blobs, std::vector< NDArray > *pre_temp_src, std::vector< NDArray > *pre_temp_dst, std::vector< NDArray > *post_temp_src, std::vector< NDArray > *post_temp_dst, std::unordered_map< uint32_t, uint32_t > *in_temp_idx_map, const std::vector< uint32_t > &mutate_idx)
Definition: exec_utils.h:139
bool shape_is_known(const TShape &x)
Definition: tuple.h:666
add to the provided space
Definition: op_attr_types.h:57
bool CheckForInputNameDuplicates(const nnvm::IndexedGraph &idx)
Check whether graph contains any duplicated names in its inputs.
bool SetupDefaultBlobsOut(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< OpReqType > *req, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst)
Definition: exec_utils.h:84
void HandleInferTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::DTypeVector &inferred_dtypes)
Definition: exec_utils.h:397
Context information about the execution environment.
Definition: base.h:102
void LogInferStorage(const nnvm::Graph &g)
Definition: exec_utils.h:343
bool DefaultStorageType(const nnvm::NodeAttrs &attrs, const int dev_mask, DispatchMode *dispatch_mode, std::vector< int > *iattr, std::vector< int > *oattr)
The default storage type inference function, which assigns all undefined storage types to kDefaultSto...
Definition: exec_utils.h:228
ndarray interface
Definition: ndarray.h:82
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:729
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
void LogMemoryPlan(const nnvm::Graph &g)
Definition: exec_utils.h:285