mxnet
tensor.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
31 #ifndef MSHADOW_TENSOR_H_
32 #define MSHADOW_TENSOR_H_
33 #include <string>
34 #include <iostream>
35 #include "./base.h"
36 #include "./expression.h"
37 
38 namespace mshadow {
40 struct cpu {
42  static const bool kDevCPU = true;
44  static const int kDevMask = 1 << 0;
45 };
47 struct gpu {
49  static const bool kDevCPU = false;
51  static const int kDevMask = 1 << 1;
52 };
53 template<int ndim>
54 struct Shape;
55 
62 template<int ndim>
63 inline std::ostream &operator<<(std::ostream &os, const Shape<ndim> &shape); // NOLINT(*)
64 
69 template<int dimension>
70 struct Shape {
72  static const int kDimension = dimension;
74  static const int kSubdim = dimension - 1;
76  index_t shape_[kDimension];
81  #pragma unroll
82  for (int i = 0; i < kDimension; ++i) {
83  this->shape_[i] = s[i];
84  }
85  }
92  return shape_[idx];
93  }
99  MSHADOW_XINLINE const index_t &operator[](int idx) const {
100  return shape_[idx];
101  }
107  #pragma unroll
108  for (int i = 0; i < kDimension; ++i) {
109  if (s.shape_[i] != this->shape_[i]) return false;
110  }
111  return true;
112  }
118  return !(*this == s);
119  }
125  Shape<1> s;
126  s[0] = this->Size();
127  return s;
128  }
134  Shape<2> s;
135  s.shape_[1] = this->shape_[kDimension - 1];
136  index_t ymax = 1;
137  #pragma unroll
138  for (int i = 0; i < kDimension - 1; ++i) {
139  ymax *= this->shape_[i];
140  }
141  s.shape_[0] = ymax;
142  return s;
143  }
146  index_t size = this->shape_[0];
147  #pragma unroll
148  for (int i = 1; i < kDimension; ++i) {
149  size *= this->shape_[i];
150  }
151  return size;
152  }
158  MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const {
159  index_t num = 1;
160  #pragma unroll
161  for (int i = dimstart; i < dimend; ++i) {
162  num *= this->shape_[i];
163  }
164  return num;
165  }
171  Shape<kSubdim> s;
172  // for cuda
173  #pragma unroll
174  for (int i = 0; i < kSubdim; ++i) {
175  s.shape_[i] = this->shape_[i + 1];
176  }
177  return s;
178  }
185  template<int dimstart, int dimend>
186  MSHADOW_XINLINE Shape<dimend - dimstart> Slice(void) const {
187  Shape<dimend - dimstart> s;
188  #pragma unroll
189  for (int i = dimstart; i < dimend; ++i) {
190  s[i - dimstart] = this->shape_[i];
191  }
192  return s;
193  }
195  template<int dim>
196  friend std::ostream &operator<<(std::ostream &os, const Shape<dim> &shape); // NOLINT(*)
198 }; // Shape
199 //------------------------------------------------
200 // useful construction functions to generate shape
201 //-------------------------------------------------
208  Shape<1> s; s[0] = s0;
209  return s;
210 }
218  Shape<2> s; s[0] = s0; s[1] = s1;
219  return s;
220 }
229  Shape<3> s;
230  s[0] = s0; s[1] = s1; s[2] = s2;
231  return s;
232 }
242  index_t s2, index_t s3) {
243  Shape<4> s;
244  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
245  return s;
246 }
257  index_t s3, index_t s4) {
258  Shape<5> s;
259  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s[4] = s4;
260  return s;
261 }
262 
270 inline Shape<3> ConvertLayout(const Shape<3>& src, int src_layout, int dst_layout) {
271  Shape<3> dst;
272  switch (src_layout) {
273  case kNCW:
274  dst = src;
275  break;
276  case kNWC:
277  dst[0] = src[0];
278  dst[1] = src[2];
279  dst[2] = src[1];
280  break;
281  default:
282  LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
283  }
284  switch (dst_layout) {
285  case kNCW:
286  return dst;
287  case kNWC:
288  {
289  index_t tmp = dst[1];
290  dst[1] = dst[2];
291  dst[2] = tmp;
292  }
293  break;
294  default:
295  LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
296  }
297  return dst;
298 }
299 
307 inline Shape<4> ConvertLayout(const Shape<4>& src, int src_layout, int dst_layout) {
308  Shape<4> dst;
309  switch (src_layout) {
310  case kNCHW:
311  dst = src;
312  break;
313  case kNHWC:
314  dst[0] = src[0];
315  dst[2] = src[1];
316  dst[3] = src[2];
317  dst[1] = src[3];
318  break;
319  default:
320  LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
321  dst = src; // fixes compiler warning
322  }
323  Shape<4> dst2;
324  switch (dst_layout) {
325  case kNCHW:
326  return dst;
327  case kNHWC:
328  dst2[0] = dst[0];
329  dst2[1] = dst[2];
330  dst2[2] = dst[3];
331  dst2[3] = dst[1];
332  break;
333  default:
334  LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
335  dst2 = src; // fixes compiler warning
336  }
337  return dst2;
338 }
339 
347 inline Shape<5> ConvertLayout(const Shape<5>& src, int src_layout, int dst_layout) {
348  Shape<5> dst;
349  switch (src_layout) {
350  case kNCDHW:
351  dst = src;
352  break;
353  case kNDHWC:
354  dst[0] = src[0];
355  dst[2] = src[1];
356  dst[3] = src[2];
357  dst[4] = src[3];
358  dst[1] = src[4];
359  break;
360  default:
361  LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
362  }
363  Shape<5> dst2;
364  switch (dst_layout) {
365  case kNCDHW:
366  return dst;
367  case kNDHWC:
368  dst2[0] = dst[0];
369  dst2[1] = dst[2];
370  dst2[2] = dst[3];
371  dst2[3] = dst[4];
372  dst2[4] = dst[1];
373  break;
374  default:
375  LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
376  }
377  return dst2;
378 }
379 
383 template<typename Device>
384 struct Stream {
385  // this is only a dummy implementation for CPU
386  // for GPU, the actual implementation will be specialized in tensor_gpu-inl.h
391  inline void Wait(void) {}
396  inline bool CheckIdle(void) {
397  return true;
398  }
400  inline void CreateBlasHandle() {}
401 };
409 template<typename Container, typename Device, int dimension, typename DType>
410 struct TRValue: public expr::RValueExp<Container, DType> {
411 };
412 // more compact template
419 template<typename Device, int dimension,
420  typename DType MSHADOW_DEFAULT_DTYPE>
421 struct Tensor: public TRValue<Tensor<Device, dimension, DType>,
422  Device, dimension, DType> {
423  public:
424  //--------------------------------
425  // struct memembers
426  //--------------------------------
428  static const bool kDevCPU = Device::kDevCPU;
430  static const int kSubdim = dimension - 1;
431  //--------------------------------
432  // struct memembers
433  //--------------------------------
435  DType *dptr_ = nullptr;
448  //--------------------------------
449  // functions
450  //--------------------------------
452  MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
455  : shape_(shape), stream_(NULL) {}
457  MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape)
458  : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(NULL) {}
460  MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape,
461  Stream<Device> *stream)
462  : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(stream) {}
464  MSHADOW_XINLINE Tensor(DType *dptr,
465  const Shape<dimension> &shape,
466  index_t stride, Stream<Device> *stream)
467  : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
472  inline void set_stream(Stream<Device> *stream) {
473  this->stream_ = stream;
474  }
479  template<int startdim>
481  index_t memsz = this->stride_;
482  #pragma unroll
483  for (int i = startdim; i < kSubdim; ++i) {
484  memsz *= this->shape_[i];
485  }
486  return memsz;
487  }
492  MSHADOW_XINLINE bool CheckContiguous(void) const {
493  return this->shape_[dimension - 1] == stride_;
494  }
499  return this->MemSize<0>();
500  }
506  MSHADOW_XINLINE index_t size(int idx) const {
507  return shape_[idx];
508  }
514  return Tensor<Device, 1, DType>(dptr_, shape_.FlatTo1D(), stride_, stream_);
515  }
521  return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
522  }
529  return Tensor<Device, kSubdim, DType>(dptr_ + this->MemSize<1>() * idx,
530  shape_.SubShape(), stride_, stream_);
531  }
539  Slice(index_t begin, index_t end) const {
540  Shape<dimension> s = this->shape_;
541  s[0] = end - begin;
542  return Tensor<Device, dimension, DType>(dptr_ + this->MemSize<1>() * begin,
543  s, stride_, stream_);
544  }
548  dptr_ = exp.dptr_;
549  shape_ = exp.shape_;
550  stride_ = exp.stride_;
551  stream_ = exp.stream_;
552  return *this;
553  }
555  template<typename E, int etype>
558  return this->__assign(exp);
559  }
561  inline Tensor<Device, dimension, DType> &operator=(const DType &exp) {
562  return this->__assign(exp);
563  }
564 };
565 /*
566  * respecialized class Tensor1D, thei is due to different implementation in operator[]
567  */
568 template<typename Device, typename DType>
569 struct Tensor<Device, 1, DType>:
570  public TRValue<Tensor<Device, 1, DType>, Device, 1, DType> {
571  public:
572  DType *dptr_;
576  // constructor
577  MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
579  : shape_(shape), stream_(NULL) {}
580  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape)
581  : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(NULL) {}
582  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape, Stream<Device> *stream)
583  : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(stream) {}
584  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape,
585  index_t stride, Stream<Device> *stream)
586  : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
587  inline void set_stream(Stream<Device> *stream) {
588  this->stream_ = stream;
589  }
591  return *this;
592  }
594  return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
595  }
597  Shape<1> s;
598  s[0] = end - begin;
599  return Tensor<Device, 1, DType>(dptr_ + begin, s, s[0], stream_);
600  }
601  MSHADOW_XINLINE bool CheckContiguous(void) const {
602  return true;
603  }
605  return shape_[0];
606  }
608  return shape_[0];
609  }
611  return dptr_[idx];
612  }
613  MSHADOW_XINLINE const DType &operator[](index_t idx) const {
614  return dptr_[idx];
615  }
617  inline Tensor<Device, 1, DType> &
619  dptr_ = exp.dptr_;
620  shape_ = exp.shape_;
621  stride_ = exp.stride_;
622  stream_ = exp.stream_;
623  return *this;
624  }
625  template<typename E, int etype>
626  inline Tensor<Device, 1, DType> &
628  return this->__assign(exp);
629  }
630  inline Tensor<Device, 1, DType> &operator=(const DType &exp) {
631  return this->__assign(exp);
632  }
633 };
634 //------------------------
635 // Function Declarations
636 //-----------------------
644 template<typename Device>
645 inline void InitTensorEngine(int device_id = 0);
652 template<typename Device>
653 inline void ShutdownTensorEngine(void);
659 template<typename Device>
660 inline void SetDevice(int devid);
669 template<typename Device>
670 inline Stream<Device> *NewStream(bool create_blas_handle,
671  bool create_dnn_handle,
672  int dev_id = -1);
677 template<typename Device>
678 inline Stream<Device> *NewStream(int dev_id) {
679  return NewStream<Device>(true, false, dev_id);
680 }
685 template<typename Device>
686 inline void DeleteStream(Stream<Device> *stream);
698 template<int dim, typename DType>
699 inline void AllocSpace(Tensor<cpu, dim, DType> *obj,
700  bool pad = MSHADOW_ALLOC_PAD);
712 template<int dim, typename DType>
713 inline void AllocSpace(Tensor<gpu, dim, DType> *obj,
714  bool pad = MSHADOW_ALLOC_PAD);
721 template<int dim, typename DType>
722 inline void FreeSpace(Tensor<cpu, dim, DType> *obj);
729 template<int dim, typename DType>
730 inline void FreeSpace(Tensor<gpu, dim, DType> *obj);
743 template<typename Device, typename DType, int dim>
745  DType initv,
746  bool pad = MSHADOW_ALLOC_PAD,
747  Stream<Device> *stream = NULL);
756 template<int dim, typename DType>
757 inline void Copy(Tensor<cpu, dim, DType> dst,
758  const Tensor<cpu, dim, DType> &src,
759  Stream<cpu> *stream = NULL);
768 template<int dim, typename DType>
769 inline void Copy(Tensor<cpu, dim, DType> dst,
770  const Tensor<gpu, dim, DType> &src,
771  Stream<gpu> *stream = NULL);
780 template<int dim, typename DType>
781 inline void Copy(Tensor<gpu, dim, DType> dst,
782  const Tensor<cpu, dim, DType> &src,
783  Stream<gpu> *stream = NULL);
792 template<int dim, typename DType>
793 inline void Copy(Tensor<gpu, dim, DType> dst,
794  const Tensor<gpu, dim, DType> &src,
795  Stream<gpu> *stream = NULL);
801 template<typename DType>
802 inline void Softmax(Tensor<cpu, 2, DType> dst, const Tensor<cpu, 2, DType> &energy);
808 template<typename DType>
809 inline void Softmax(Tensor<gpu, 2, DType> dst, const Tensor<gpu, 2, DType> &energy);
810 
817 template<typename DType>
818 inline void SoftmaxGrad(Tensor<cpu, 2, DType> dst,
819  const Tensor<cpu, 2, DType> &src,
820  const Tensor<cpu, 1, DType> &label);
827 template<typename DType>
828 inline void SoftmaxGrad(const Tensor<gpu, 2, DType> &dst,
829  const Tensor<gpu, 2, DType> &src,
830  const Tensor<gpu, 1, DType> &label);
839 template<bool clip = true, typename IndexType, typename DType>
840 inline void AddTakeGrad(Tensor<cpu, 2, DType> dst,
841  const Tensor<cpu, 1, IndexType>& index,
842  const Tensor<cpu, 2, DType> &src);
851 template<bool clip = true, typename IndexType, typename DType>
852 inline void AddTakeGrad(Tensor<gpu, 2, DType> dst,
853  const Tensor<gpu, 1, IndexType>& index,
854  const Tensor<gpu, 2, DType> &src);
864 template<typename IndexType, typename DType>
866  const Tensor<cpu, 1, IndexType>& sorted,
867  const Tensor<cpu, 1, IndexType>& index,
868  const Tensor<cpu, 2, DType> &src);
878 template<typename IndexType, typename DType>
880  const Tensor<gpu, 1, IndexType>& sorted,
881  const Tensor<gpu, 1, IndexType>& index,
882  const Tensor<gpu, 2, DType> &src);
891 template<typename IndexType, typename DType>
892 inline void IndexFill(Tensor<cpu, 2, DType> dst,
893  const Tensor<cpu, 1, IndexType>& index,
894  const Tensor<cpu, 2, DType> &src);
903 template<typename IndexType, typename DType>
904 inline void IndexFill(Tensor<gpu, 2, DType> dst,
905  const Tensor<gpu, 1, IndexType>& index,
906  const Tensor<gpu, 2, DType> &src);
913 template<typename KDType, typename VDType>
915  bool is_ascend = true);
922 template<typename KDType, typename VDType>
924  bool is_ascend = true);
933 template<typename Device, typename VDType, typename SDType>
935 
936 // function declarations to support expression, no need to understand them
937 // these functions do not need to be directly used
950 template<typename Saver, typename R, int dim,
951  typename DType, typename E, int etype>
952 inline void MapExp(TRValue<R, cpu, dim, DType> *dst,
953  const expr::Exp<E, DType, etype> &exp);
966 template<typename Saver, typename R, int dim,
967  typename DType, typename E, int etype>
968 inline void MapExp(TRValue<R, gpu, dim, DType> *dst,
969  const expr::Exp<E, DType, etype> &exp);
983 template<typename Saver, typename Reducer,
984  typename R, typename DType, typename E, int etype>
986  const expr::Exp<E, DType, etype> &exp,
987  DType scale = 1);
1001 template<typename Saver, typename Reducer, typename R,
1002  typename DType, typename E, int etype>
1004  const expr::Exp<E, DType, etype> &exp,
1005  DType scale = 1);
1020 template<typename Saver, typename Reducer, int dimkeep,
1021  typename R, typename DType, typename E, int etype>
1023  const expr::Exp<E, DType, etype> &exp,
1024  DType scale = 1);
1039 template<typename Saver, typename Reducer, int dimkeep,
1040  typename R, typename DType, typename E, int etype>
1042  const expr::Exp<E, DType, etype> &exp,
1043  DType scale = 1);
1050 template<typename Device, typename DType>
1051 inline void VectorDot(Tensor<Device, 1, DType> dst,
1052  const Tensor<Device, 1, DType> &lhs,
1053  const Tensor<Device, 1, DType> &rhs);
1063 template<bool transpose_left, bool transpose_right, typename Device, typename DType>
1064 inline void BatchGEMM(Tensor<Device, 3, DType> dst,
1065  const Tensor<Device, 3, DType> &lhs,
1066  const Tensor<Device, 3, DType> &rhs,
1067  DType alpha,
1068  DType beta,
1069  Tensor<Device, 1, DType*> workspace);
1070 } // namespace mshadow
1071 // include headers
1072 #include "./stream_gpu-inl.h"
1073 #include "./extension.h"
1074 #include "./expr_engine-inl.h"
1075 #include "./tensor_cpu-inl.h"
1076 #include "./tensor_gpu-inl.h"
1077 #include "./io.h"
1078 #include "./tensor_container.h"
1079 #include "./random.h"
1080 // add definition of scalar related operators
1081 #ifdef MSHADOW_SCALAR_
1082  #error "MSHADOW_SCALAR_ must not be defined"
1083 #endif
1084 // enumerate all the scalar data type we aim to be good at
1085 #define MSHADOW_SCALAR_ float
1086 #include "./expr_scalar-inl.h"
1087 #undef MSHADOW_SCALAR_
1088 #define MSHADOW_SCALAR_ double
1089 #include "./expr_scalar-inl.h"
1090 #undef MSHADOW_SCALAR_
1091 #define MSHADOW_SCALAR_ int32_t
1092 #include "./expr_scalar-inl.h"
1093 #undef MSHADOW_SCALAR_
1094 #define MSHADOW_SCALAR_ int64_t
1095 #include "./expr_scalar-inl.h"
1096 #undef MSHADOW_SCALAR_
1097 #define MSHADOW_SCALAR_ mshadow::half::half_t
1098 #include "./expr_scalar-inl.h"
1099 #undef MSHADOW_SCALAR_
1100 #endif // MSHADOW_TENSOR_H_
void VectorDot(Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs)
CPU/GPU: 1 dimension vector dot.
Definition: tensor_cpu-inl.h:598
void FreeSpace(Tensor< cpu, dim, DType > *obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.h:141
Stream< Device > * stream_
Definition: tensor.h:575
MSHADOW_XINLINE index_t & operator[](int idx)
get corresponding index
Definition: tensor.h:91
void IndexFill(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...
Definition: tensor_cpu-inl.h:548
Definition: base.h:489
void SoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
CPU/GPU: softmax gradient.
Definition: tensor_cpu-inl.h:307
PaddingExp< SrcExp, DType, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, DType, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: pad.h:72
MSHADOW_XINLINE index_t Size(void) const
Definition: tensor.h:145
DType * dptr_
pointer to the data
Definition: tensor.h:435
Tensor RValue, this is the super type of all kinds of possible tensors.
Definition: tensor.h:410
Stream< Device > * NewStream(bool create_blas_handle, bool create_dnn_handle, int dev_id=-1)
create a new stream from system
MSHADOW_XINLINE Shape< dimend-dimstart > Slice(void) const
slice the shape from start to end
Definition: tensor.h:186
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
Definition: tensor.h:593
void Copy(Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.h:146
void ShutdownTensorEngine(void)
Shutdown tensor engine on current device this function should be called after all GPU tensor operatio...
shape of a tensor
Definition: tensor.h:54
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, Stream< Device > *stream)
Definition: tensor.h:582
void MapExp(TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.h:208
Definition: stream_gpu-inl.h:38
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape)
constructor from data pointer and shape, without stride
Definition: tensor.h:457
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
flatten the tensor to 1 dimension
Definition: tensor.h:513
Shape< dimension > shape_
shape of the tensor
Definition: tensor.h:437
MSHADOW_XINLINE const DType & operator[](index_t idx) const
Definition: tensor.h:613
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s0, index_t s1, index_t s2, index_t s3)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:241
MSHADOW_XINLINE bool operator!=(const Shape< kDimension > &s) const
Definition: tensor.h:117
void SortByKey(Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...
Definition: tensor_cpu-inl.h:559
Tensor< Device, dimension, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
functions to fit expression template
Definition: tensor.h:557
MSHADOW_XINLINE Shape< 2 > FlatTo2D(void) const
Definition: tensor.h:133
void Softmax(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...
Definition: tensor_cpu-inl.h:484
void VectorizedSort(Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments)
CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an asc...
Definition: tensor_cpu-inl.h:590
void set_stream(Stream< Device > *stream)
set the stream to do computation of current tensor
Definition: tensor.h:472
void BatchGEMM(Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType * > workspace)
CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst.
Definition: tensor_cpu-inl.h:611
base class of all rvalues
Definition: expression.h:149
Definition: base.h:481
static const bool kDevCPU
whether this device is CPU or not
Definition: tensor.h:42
void DeleteStream(Stream< Device > *stream)
delete the computing stream
MSHADOW_XINLINE Shape< kSubdim > SubShape(void) const
get subshape that takes off largest dimension v *
Definition: tensor.h:170
void MapReduceKeepLowest(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.h:224
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape)
Definition: tensor.h:580
#define MSHADOW_ALLOC_PAD
whether do padding during allocation
Definition: base.h:73
Definition: base.h:482
device name CPU
Definition: tensor.h:40
device name GPU
Definition: tensor.h:47
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:520
bool CheckIdle(void)
query whether the the stream is idle
Definition: tensor.h:396
#define MSHADOW_XINLINE
Definition: base.h:223
Tensor< Device, 1, DType > & operator=(const Tensor< Device, 1, DType > &exp)
implement the assignment of same type
Definition: tensor.h:618
MSHADOW_XINLINE Tensor(void)
default constructor
Definition: tensor.h:452
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:604
definitions of abstract expressions and expressions template
MSHADOW_XINLINE index_t size(index_t i) const
Definition: tensor.h:607
Tensor< Device, dimension, DType > & operator=(const Tensor< Device, dimension, DType > &exp)
implement the assignment of same type
Definition: tensor.h:547
Shape< 3 > ConvertLayout(const Shape< 3 > &src, int src_layout, int dst_layout)
Convert shape in src_layout to shape in dst_layout.
Definition: tensor.h:270
void CreateBlasHandle()
create a blas handle
Definition: tensor.h:400
int32_t index_t
type that will be used for index
Definition: base.h:336
MSHADOW_XINLINE Shape< 1 > FlatTo1D(void) const
Definition: tensor.h:124
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
Definition: tensor.h:590
void AllocSpace(Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.h:117
DType * dptr_
Definition: tensor.h:572
definitions of how expressions should be evaluated
MSHADOW_XINLINE const index_t & operator[](int idx) const
get corresponding index
Definition: tensor.h:99
MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const
Definition: tensor.h:158
definitions of operators in expression with respect to scalar this file will be included several time...
void AddTakeGradLargeBatch(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...
Definition: tensor_cpu-inl.h:538
MSHADOW_XINLINE Shape< 5 > Shape5(index_t s0, index_t s1, index_t s2, index_t s3, index_t s4)
construct a five dimension shape, stride will equal s0
Definition: tensor.h:256
MSHADOW_XINLINE index_t MemSize(void) const
Definition: tensor.h:480
void SetDevice(int devid)
set the device of current thread to work on
MSHADOW_XINLINE Shape(const Shape< kDimension > &s)
constuctor
Definition: tensor.h:80
some extension of expressions, used to support something beyond elementwise op
MSHADOW_XINLINE Shape< 1 > Shape1(index_t s0)
construct a one dimension shape, stride will equal s0
Definition: tensor.h:207
MSHADOW_XINLINE Tensor< Device, kSubdim, DType > operator[](index_t idx) const
get a element of dimension - 1
Definition: tensor.h:528
index_t shape_[kDimension]
storing the dimension information
Definition: tensor.h:76
MSHADOW_XINLINE Tensor(const Shape< 1 > &shape)
Definition: tensor.h:578
MSHADOW_XINLINE Shape(void)
default constructor, do nothing
Definition: tensor.h:78
void InitTensorEngine(int device_id=0)
initialize tensor engine, used to call intialization functions of dependent libs this function should...
MSHADOW_XINLINE Shape< 2 > Shape2(index_t s0, index_t s1)
construct a two dimension shape, stride will equal s0
Definition: tensor.h:217
Definition: base.h:490
implementation of GPU code
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:601
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:44
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:492
Definition: base.h:485
MSHADOW_XINLINE Tensor< Device, dimension, DType > Slice(index_t begin, index_t end) const
slice the tensor in highest dimension [begin,end)
Definition: tensor.h:539
void Wait(void)
wait for all the computations associated with this stream to complete
Definition: tensor.h:391
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, index_t stride, Stream< Device > *stream)
constructor from data pointer and shape
Definition: tensor.h:464
MSHADOW_XINLINE Tensor< Device, 1, DType > Slice(index_t begin, index_t end) const
Definition: tensor.h:596
void MapReduceKeepHighDim(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.h:251
Definition: base.h:486
MSHADOW_XINLINE Tensor(const Shape< dimension > &shape)
constructor from shape
Definition: tensor.h:454
index_t stride_
Definition: tensor.h:574
Tensor< Device, dim, DType > NewTensor(const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL)
CPU/GPU: short cut to allocate and initialize a Tensor.
Definition: tensor_cpu-inl.h:133
defines how expression exp can be evaluated and stored into dst
Definition: expression.h:80
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, Stream< Device > *stream)
constructor from data pointer and shape, without stride
Definition: tensor.h:460
Tensor< Device, 1, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
Definition: tensor.h:627
implementation of GPU host code
tensor container that does memory allocation and resize like STL
void AddTakeGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...
Definition: tensor_cpu-inl.h:517
Definition: tensor.h:569
MSHADOW_XINLINE Shape< 3 > Shape3(index_t s0, index_t s1, index_t s2)
construct a three dimension shape, stride will equal s0
Definition: tensor.h:228
overloaded + operator between half_t and bf16_t
Definition: base.h:327
void set_stream(Stream< Device > *stream)
Definition: tensor.h:587
Random inline functions for tensor.
MSHADOW_XINLINE DType & operator[](index_t idx)
Definition: tensor.h:610
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:506
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, index_t stride, Stream< Device > *stream)
Definition: tensor.h:584
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:442
Tensor< Device, 1, DType > & operator=(const DType &exp)
Definition: tensor.h:630
general tensor
Definition: tensor.h:421
implementation of CPU host code
#define MSHADOW_DEFAULT_DTYPE
default data type for tensor string in code release, change it to default_real_t during development...
Definition: base.h:242
MSHADOW_XINLINE Tensor(void)
Definition: tensor.h:577
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:498
Stream< Device > * stream_
stream where the computation lies stream is a device dependency concept where each computation ...
Definition: tensor.h:447
Shape< 1 > shape_
Definition: tensor.h:573
MSHADOW_XINLINE bool operator==(const Shape< kDimension > &s) const
Definition: tensor.h:106
Tensor< Device, dimension, DType > & operator=(const DType &exp)
functions to fit expression template
Definition: tensor.h:561
computaion stream structure, used for asynchronous computations
Definition: tensor.h:384