mxnet
ndarray.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_NDARRAY_H_
25 #define MXNET_NDARRAY_H_
26 
27 #include <dmlc/base.h>
28 #include <dmlc/logging.h>
29 #include <dmlc/io.h>
30 #include <dmlc/type_traits.h>
31 #include <dmlc/registry.h>
32 #include <nnvm/node.h>
33 #include <vector>
34 #include <map>
35 #include <string>
36 #include <memory>
37 #include "./base.h"
38 #include "./storage.h"
39 #include "./engine.h"
40 #if MKL_EXPERIMENTAL == 1
41 #include <mkl_memory.h>
42 #endif
43 // check c++11
44 #if DMLC_USE_CXX11 == 0
45 #error "cxx11 was required for ndarray module"
46 #endif
47 
48 namespace mxnet {
49 
50 // forward declaration
51 namespace autograd {
52 class AGNode;
53 
54 using AGNodePtr = std::shared_ptr<AGNode>;
55 
56 class AGNodeEntry {
57  public:
59  uint32_t index;
60  uint32_t version;
61 
62  void clear() {
63  ag_node.reset();
64  index = version = 0;
65  }
66 
67  nnvm::NodeEntry nn_entry() const;
68  bool is_none() const;
69 };
70 
71 class AutogradRuntime;
72 } // namespace autograd
73 
77 class NDArray {
78  public:
80  NDArray() {
81 #if MKL_EXPERIMENTAL == 1
82  Mkl_mem_ = MKLMemHolder::create();
83 #endif
84  }
92  NDArray(const TShape &shape, Context ctx,
93  bool delay_alloc = false, int dtype = mshadow::default_type_flag)
94  : ptr_(std::make_shared<Chunk>(shape.Size(), ctx, delay_alloc, dtype)),
95  shape_(shape), dtype_(dtype), entry_({nullptr, 0, 0}) {
96 #if MKL_EXPERIMENTAL == 1
97  Mkl_mem_ = std::make_shared<MKLMemHolder>();
98 #endif
99  }
107  NDArray(const TBlob &data, int dev_id)
108  : ptr_(std::make_shared<Chunk>(data, dev_id)), shape_(data.shape_),
109  dtype_(data.type_flag_), entry_({nullptr, 0, 0}) {
110 #if MKL_EXPERIMENTAL == 1
111  Mkl_mem_ = std::make_shared<MKLMemHolder>();
112 #endif
113  }
117  inline const TShape& shape() const {
118  return shape_;
119  }
123  inline const TBlob& data() const {
124  CheckAndAlloc();
125  SetTBlob();
126  return tblob_;
127  }
131  NDArray grad() const;
135  inline Context ctx() const {
136  return ptr_->shandle.ctx;
137  }
141  inline int dtype() const {
142  return dtype_;
143  }
145  inline bool is_none() const {
146  return ptr_.get() == nullptr;
147  }
149  bool fresh_out_grad() const;
151  void set_fresh_out_grad(bool state) const;
156  inline void WaitToRead() const {
157  if (is_none()) return;
158  Engine::Get()->WaitForVar(ptr_->var);
159  }
164  inline void WaitToWrite() const {
165  if (is_none()) return;
170  Engine::Get()->PushSync([](RunContext) {}, Context{}, {}, {ptr_->var});
171  Engine::Get()->WaitForVar(ptr_->var);
172  }
174  inline Engine::VarHandle var() const {
175  return ptr_->var;
176  }
181  void Save(dmlc::Stream *strm) const;
187  bool Load(dmlc::Stream *strm);
193  NDArray &operator=(real_t scalar);
200  NDArray &operator+=(const NDArray &src);
207  NDArray &operator+=(const real_t &src);
214  NDArray &operator-=(const NDArray &src);
221  NDArray &operator-=(const real_t &src);
228  NDArray &operator*=(const NDArray &src);
235  NDArray &operator*=(const real_t &src);
242  NDArray &operator/=(const NDArray &src);
249  NDArray &operator/=(const real_t &src);
254  NDArray T() const;
260  NDArray Copy(Context ctx) const;
271  void SyncCopyFromCPU(const void *data, size_t size) const;
282  void SyncCopyToCPU(void *data, size_t size) const;
289  NDArray Slice(index_t begin, index_t end) const;
295  NDArray At(index_t idx) const;
303  inline NDArray AsArray(const TShape &shape, int dtype) const {
304  CHECK_GE(shape_.Size() * mshadow::mshadow_sizeof(dtype_),
305  shape.Size() * mshadow::mshadow_sizeof(dtype))
306  << "NDArray.AsArray: target memory size is bigger";
307 #if MKL_EXPERIMENTAL == 1
308  if (Mkl_mem_ != nullptr) {
309  // convert prv to cpu
310  Mkl_mem_->check_and_prv_to_cpu(ptr_->shandle.dptr);
311  }
312 #endif
313  NDArray ret = *this;
314  ret.shape_ = shape;
315  ret.dtype_ = dtype;
316  return ret;
317  }
323  NDArray Reshape(const TShape &shape) const;
327  NDArray Detach() const {
328  NDArray ret(*this);
329  ret.entry_ = autograd::AGNodeEntry{nullptr, 0, 0};
330  return ret;
331  }
332 
333  nnvm::Symbol get_autograd_symbol() {
334  CHECK(!entry_.is_none())
335  << "NDArray is not part of a computation graph. Did you forget to turn on recording?";
336  nnvm::Symbol ret;
337  ret.outputs.emplace_back(entry_.nn_entry());
338  return ret;
339  }
344  inline void CheckAndAlloc() const {
345  ptr_->CheckAndAlloc();
346  }
353  static void Save(dmlc::Stream* fo,
354  const std::vector<NDArray>& data,
355  const std::vector<std::string>& names);
362  static void Load(dmlc::Stream* fi,
363  std::vector<NDArray>* data,
364  std::vector<std::string>* keys);
365 
366  private:
367  friend class autograd::AutogradRuntime;
369  struct Chunk {
371  Storage::Handle shandle;
373  Engine::VarHandle var;
378  bool static_data;
380  bool delay_alloc;
382  Chunk() : static_data(true), delay_alloc(false) {
383  var = Engine::Get()->NewVariable();
384  }
386  Chunk(const TBlob &data, int dev_id)
387  : static_data(true),
388  delay_alloc(false) {
389  var = Engine::Get()->NewVariable();
390  if (data.dev_mask() == cpu::kDevMask) {
391  shandle.ctx = Context::CPU();
392  } else {
393  CHECK_EQ(data.dev_mask(), gpu::kDevMask);
394  shandle.ctx = Context::GPU(dev_id);
395  }
396  shandle.dptr = data.dptr_;
397  shandle.size = data.shape_.Size() * mshadow::mshadow_sizeof(data.type_flag_);
398  }
400  Chunk(uint64_t size, Context ctx, bool delay_alloc_, int dtype)
401  : static_data(false), delay_alloc(true) {
402  var = Engine::Get()->NewVariable();
403  shandle.size = size * mshadow::mshadow_sizeof(dtype);
404  shandle.ctx = ctx;
405  if (!delay_alloc_) this->CheckAndAlloc();
406  }
408  inline void CheckAndAlloc(void) {
409  if (delay_alloc) {
410  shandle = Storage::Get()->Alloc(shandle.size, shandle.ctx);
411  delay_alloc = false;
412  }
413  }
415  ~Chunk() {
416  if (static_data || delay_alloc) {
417  Engine::Get()->DeleteVariable([](RunContext s) {}, shandle.ctx, var);
418  } else {
419  Storage::Handle h = this->shandle;
421  Storage::Get()->Free(h);
422  }, shandle.ctx, var);
423  }
424  }
425  };
426 
427  void SetTBlob() const {
428  tblob_.dptr_ = static_cast<char*>(ptr_->shandle.dptr) + byte_offset_;
429  tblob_.shape_ = shape_;
430  tblob_.type_flag_ = dtype_;
431  tblob_.SetDLTensor(ptr_->shandle.ctx.dev_mask(), ptr_->shandle.ctx.dev_id);
432 #if MKL_EXPERIMENTAL == 1
433  tblob_.Mkl_mem_ = Mkl_mem_;
434 #endif
435  }
436 
437 #if MKL_EXPERIMENTAL == 1
438  std::shared_ptr<MKLMemHolder> Mkl_mem_;
439 #endif
440 
441  std::shared_ptr<Chunk> ptr_;
443  TShape shape_;
445  size_t byte_offset_ = 0;
447  int dtype_ = -1;
449  autograd::AGNodeEntry entry_;
457  mutable TBlob tblob_;
458 };
459 
471 void CopyFromTo(const NDArray &from, NDArray *to, int priority = 0);
472 
473 
480 void ElementwiseSum(const std::vector<NDArray> &source, NDArray *out, int priority = 0);
481 
488 NDArray operator+(const NDArray &lhs, const NDArray &rhs);
495 NDArray operator+(const NDArray &lhs, const real_t &rhs);
502 NDArray operator-(const NDArray &lhs, const NDArray &rhs);
509 NDArray operator-(const NDArray &lhs, const real_t &rhs);
516 NDArray operator*(const NDArray &lhs, const NDArray &rhs); \
523 NDArray operator*(const NDArray &lhs, const real_t &rhs);
530 NDArray operator/(const NDArray &lhs, const NDArray &rhs);
537 NDArray operator/(const NDArray &lhs, const real_t &rhs);
538 
543 void RandomSeed(uint32_t seed);
550 void SampleUniform(real_t begin, real_t end, NDArray *out);
557 void SampleGaussian(real_t mu, real_t sigma, NDArray *out);
564 void SampleGamma(real_t alpha, real_t beta, NDArray *out);
570 void SampleExponential(real_t lambda, NDArray *out);
576 void SamplePoisson(real_t lambda, NDArray *out);
583 void SampleNegBinomial(int32_t k, real_t p, NDArray *out);
590 void SampleGenNegBinomial(real_t mu, real_t alpha, NDArray *out);
591 
592 
593 //--------------------------------------------------------------
594 // The following part are API Registration of NDArray functions.
595 //--------------------------------------------------------------
596 
598 typedef std::function<void (NDArray **used_vars,
599  real_t *scalars,
600  NDArray **mutate_vars,
601  int num_params,
602  char **param_keys,
603  char **param_vals)> NDArrayAPIFunction;
619 };
622  : public dmlc::FunctionRegEntryBase<NDArrayFunctionReg,
623  NDArrayAPIFunction> {
625  unsigned num_use_vars;
627  unsigned num_mutate_vars;
629  unsigned num_scalars;
636  : num_use_vars(0),
637  num_mutate_vars(0),
638  num_scalars(0),
639  type_mask(0) {}
646  inline NDArrayFunctionReg &set_function(void (*fsetvalue)(const real_t &rhs,
647  NDArray *out)) {
648  body = [fsetvalue] (NDArray **used_vars, real_t *s, NDArray **mutate_vars,
649  int num_params, char **param_keys, char **param_vals) {
650  (*fsetvalue)(s[0], mutate_vars[0]);
651  };
652  num_mutate_vars = 1; num_scalars = 1;
653  this->add_argument("src", "real_t", "Source input to the function.");
654  return *this;
655  }
662  inline NDArrayFunctionReg &set_function(void(*fternary)(const NDArray &lhs,
663  const NDArray &mhs,
664  const NDArray &rhs,
665  NDArray *out)) {
666  body = [fternary](NDArray **used_vars,
667  real_t *s, NDArray **mutate_vars,
668  int num_params, char **param_keys, char **param_vals) {
669  (*fternary)(*used_vars[0], *used_vars[1], *used_vars[2], mutate_vars[0]);
670  };
671  num_use_vars = 3; num_mutate_vars = 1;
673  this->add_argument("lhs", "NDArray", "Left operand to the function.");
674  this->add_argument("mhs", "NDArray", "Middle operand to the function.");
675  this->add_argument("rhs", "NDArray", "Right operand to the function.");
676  return *this;
677  }
684  inline NDArrayFunctionReg &set_function(void (*fbinary)(const NDArray &lhs,
685  const NDArray &rhs,
686  NDArray *out)) {
687  body = [fbinary] (NDArray **used_vars, real_t *s, NDArray **mutate_vars,
688  int num_params, char **param_keys, char **param_vals) {
689  (*fbinary)(*used_vars[0], *used_vars[1], mutate_vars[0]);
690  };
691  num_use_vars = 2; num_mutate_vars = 1;
693  this->add_argument("lhs", "NDArray", "Left operand to the function.");
694  this->add_argument("rhs", "NDArray", "Right operand to the function.");
695  return *this;
696  }
703  inline NDArrayFunctionReg &set_function(void (*fscalar)(const NDArray &lhs,
704  const real_t &rhs,
705  NDArray *out)) {
706  body = [fscalar] (NDArray **used_vars, real_t *s, NDArray **mutate_vars,
707  int num_params, char **param_keys, char **param_vals) {
708  (*fscalar)(*used_vars[0], s[0], mutate_vars[0]);
709  };
710  num_use_vars = 1; num_mutate_vars = 1; num_scalars = 1;
712  this->add_argument("lhs", "NDArray", "Left operand to the function.");
713  this->add_argument("rhs", "real_t", "Right operand to the function.");
714  return *this;
715  }
722  inline NDArrayFunctionReg &set_function(void (*funary)(const NDArray &src,
723  NDArray *out)) {
724  body = [funary] (NDArray **used_vars, real_t *s, NDArray **mutate_vars,
725  int num_params, char **param_keys, char **param_vals) {
726  (*funary)(*used_vars[0], mutate_vars[0]);
727  };
728  num_use_vars = 1; num_mutate_vars = 1;
730  this->add_argument("src", "NDArray", "Source input to the function.");
731  return *this;
732  }
740  void (*fgeneric)(NDArray **used_vars,
741  real_t *s,
742  NDArray **mutate_vars,
743  const std::map<std::string, std::string>& param)) {
744  body = [fgeneric] (NDArray **used_vars, real_t *s, NDArray **mutate_vars,
745  int num_params, char **param_keys, char **param_vals) {
746  std::map<std::string, std::string> param;
747  for (int i = 0; i < num_params; ++i) {
748  param[param_keys[i]] = param_vals[i];
749  }
750  fgeneric(used_vars, s, mutate_vars, param);
751  };
752  return *this;
753  }
759  inline NDArrayFunctionReg &set_num_use_vars(unsigned n) {
760  num_use_vars = n; return *this;
761  }
768  num_mutate_vars = n; return *this;
769  }
775  inline NDArrayFunctionReg &set_num_scalars(unsigned n) {
776  num_scalars = n; return *this;
777  }
783  inline NDArrayFunctionReg &set_type_mask(int tmask) {
784  type_mask = tmask; return *this;
785  }
786 }; // NDArrayFunctionReg
787 
799 #define MXNET_REGISTER_NDARRAY_FUN(name) \
800  DMLC_REGISTRY_REGISTER(::mxnet::NDArrayFunctionReg, NDArrayFunctionReg, name)
801 
802 } // namespace mxnet
803 
804 namespace dmlc {
806 DMLC_DECLARE_TRAITS(has_saveload, mxnet::NDArray, true);
807 } // namespace dmlc
808 #endif // MXNET_NDARRAY_H_
NDArrayFunctionReg & set_num_mutate_vars(unsigned n)
set the number of mutate variables
Definition: ndarray.h:767
Engine::VarHandle var() const
Definition: ndarray.h:174
void RandomSeed(uint32_t seed)
Seed the random number generator.
Engine that schedules all the operations according to dependency.
TShape shape_
shape of the tensor
Definition: tensor_blob.h:64
void clear()
Definition: ndarray.h:62
NDArrayFunctionReg()
constructor
Definition: ndarray.h:635
namespace of mxnet
Definition: base.h:126
NDArray operator*(const NDArray &lhs, const NDArray &rhs)
elementwise multiplication
virtual void Free(Handle handle)=0
Free storage.
NDArrayFunctionReg & set_num_use_vars(unsigned n)
set the number of mutate variables
Definition: ndarray.h:759
DMLC_DECLARE_TRAITS(has_saveload, mxnet::NDArray, true)
traits
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:134
static Context GPU(int32_t dev_id=-1)
int type_mask
information on how function should be called from API
Definition: ndarray.h:631
NDArrayFunctionReg & set_function(void(*funary)(const NDArray &src, NDArray *out))
set the function body to a unary NDArray function this will also auto set the parameters correctly ...
Definition: ndarray.h:722
NDArray Detach() const
Return a copy of this NDArray without autograd history.
Definition: ndarray.h:327
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:66
NDArrayFunctionReg & set_num_scalars(unsigned n)
set the number of scalar arguments
Definition: ndarray.h:775
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:136
unsigned num_mutate_vars
number of variable mutated by this function
Definition: ndarray.h:627
execution time context. The information needed in runtime for actual execution.
Definition: base.h:238
void * dptr
Pointer to the data.
Definition: storage.h:44
nnvm::NodeEntry nn_entry() const
NDArrayFunctionReg & set_function(void(*fscalar)(const NDArray &lhs, const real_t &rhs, NDArray *out))
set the function body to a binary NDArray function this will also auto set the parameters correctly ...
Definition: ndarray.h:703
Context ctx
Context information about device and ID.
Definition: storage.h:52
NDArray()
default constructor
Definition: ndarray.h:80
unsigned num_use_vars
number of variable used by this function
Definition: ndarray.h:625
NDArrayFunctionReg & set_function(void(*fternary)(const NDArray &lhs, const NDArray &mhs, const NDArray &rhs, NDArray *out))
set the function body to a ternary NDArray function this will also auto set the parameters correctly ...
Definition: ndarray.h:662
virtual Handle Alloc(size_t size, Context ctx)=0
Allocate a new contiguous memory for a given size.
Definition: ndarray.h:56
bool is_none() const
Definition: ndarray.h:145
all the scalar should go before use_vars
Definition: ndarray.h:609
void SampleExponential(real_t lambda, NDArray *out)
Sample exponential distribution for each elements of out.
void * dptr_
pointer to the data
Definition: tensor_blob.h:62
virtual VarHandle NewVariable()=0
Allocate a new variable, the variable can then be used to schedule the operation concurrently via dep...
whether this function allows the handles in the target to be empty NDArray that are not yet initializ...
Definition: ndarray.h:618
static Storage * Get()
const TShape & shape() const
Definition: ndarray.h:117
Definition: ndarray.h:804
std::shared_ptr< AGNode > AGNodePtr
Definition: ndarray.h:54
virtual void WaitForVar(VarHandle var)=0
Wait for a variable.
Context ctx() const
Definition: ndarray.h:135
void SampleGaussian(real_t mu, real_t sigma, NDArray *out)
Sample gaussian distribution for each elements of out.
Storage manager across multiple devices.
void WaitToRead() const
Block until all the pending write operations with respect to current NDArray are finished, and read can be performed.
Definition: ndarray.h:156
nnvm::Symbol get_autograd_symbol()
Definition: ndarray.h:333
int dtype() const
Definition: ndarray.h:141
Storage handle.
Definition: storage.h:40
virtual void DeleteVariable(SyncFn delete_fn, Context exec_ctx, VarHandle var)=0
Schedule the deletion of a variable.
NDArrayFunctionReg & set_type_mask(int tmask)
set type mask
Definition: ndarray.h:783
engine::VarHandle VarHandle
Variable pointer.
Definition: engine.h:102
void WaitToWrite() const
Block until all the pending read/write operations with respect to current NDArray are finished...
Definition: ndarray.h:164
NDArray operator-(const NDArray &lhs, const NDArray &rhs)
elementwise subtraction
NDArrayFunctionReg & set_function(void(*fsetvalue)(const real_t &rhs, NDArray *out))
set the function body to a NDArray setvalue function this will also auto set the parameters correctly...
Definition: ndarray.h:646
NDArray operator+(const NDArray &lhs, const NDArray &rhs)
elementwise add
void SampleUniform(real_t begin, real_t end, NDArray *out)
Sample uniform distribution for each elements of out.
Registry entry for NDArrayFunction.
Definition: ndarray.h:621
void PushSync(SyncFn exec_fn, Context exec_ctx, std::vector< VarHandle > const &const_vars, std::vector< VarHandle > const &mutable_vars, FnProperty prop=FnProperty::kNormal, int priority=0, const char *opr_name=nullptr)
Push an synchronous operation to the engine.
Definition: engine.h:222
NDArrayFunctionReg & set_function(void(*fbinary)(const NDArray &lhs, const NDArray &rhs, NDArray *out))
set the function body to a binary NDArray function this will also auto set the parameters correctly ...
Definition: ndarray.h:684
static Context CPU(int32_t dev_id=0)
all the use_vars should go before scalar
Definition: ndarray.h:607
NDArray AsArray(const TShape &shape, int dtype) const
Create a NDArray that shares memory with current one The new array must have smaller memory size than...
Definition: ndarray.h:303
unsigned num_scalars
number of scalars used by this function
Definition: ndarray.h:629
static Engine * Get()
const TBlob & data() const
Definition: ndarray.h:123
AGNodePtr ag_node
Definition: ndarray.h:58
void CheckAndAlloc() const
Allocate the space if it is delayed allocated. This is an internal function used by system that norma...
Definition: ndarray.h:344
uint32_t index
Definition: ndarray.h:59
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:132
size_t size
Size of the storage.
Definition: storage.h:48
uint32_t version
Definition: ndarray.h:60
void SampleGenNegBinomial(real_t mu, real_t alpha, NDArray *out)
Sample generalized negative binomial distribution for each elements of out.
Context information about the execution environment.
Definition: base.h:141
void SamplePoisson(real_t lambda, NDArray *out)
Sample Poisson distribution for each elements of out.
ndarray interface
Definition: ndarray.h:77
void CopyFromTo(const NDArray &from, NDArray *to, int priority=0)
issue an copy operation from one NDArray to another the two ndarray can sit on different devices this...
NDArray(const TBlob &data, int dev_id)
constructing a static NDArray that shares data with TBlob Use with caution: allocate ONLY ONE NDArray...
Definition: ndarray.h:107
int dev_mask() const
device mask of the corresponding device
Definition: tensor_blob.h:227
Symbol Reshape(const std::string &symbol_name, Symbol data, Shape shape=Shape(), bool reverse=false, Shape target_shape=Shape(), bool keep_highest=false)
Definition: op.h:1156
void ElementwiseSum(const std::vector< NDArray > &source, NDArray *out, int priority=0)
Perform elementwise sum over each data from source, store result into out.
std::function< void(NDArray **used_vars, real_t *scalars, NDArray **mutate_vars, int num_params, char **param_keys, char **param_vals)> NDArrayAPIFunction
definition of NDArray function
Definition: ndarray.h:603
void SampleNegBinomial(int32_t k, real_t p, NDArray *out)
Sample negative binomial distribution for each elements of out.
NDArrayFunctionReg & set_function(void(*fgeneric)(NDArray **used_vars, real_t *s, NDArray **mutate_vars, const std::map< std::string, std::string > &param))
set the function body to a unary NDArray function this will also auto set the parameters correctly ...
Definition: ndarray.h:739
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:58
void SampleGamma(real_t alpha, real_t beta, NDArray *out)
Sample gamma distribution for each elements of out.
NDArray(const TShape &shape, Context ctx, bool delay_alloc=false, int dtype=mshadow::default_type_flag)
constructs a new dynamic NDArray
Definition: ndarray.h:92
NDArray operator/(const NDArray &lhs, const NDArray &rhs)
elementwise division
NDArrayFunctionTypeMask
mask information on how functions can be exposed
Definition: ndarray.h:605