7 #ifndef MSHADOW_TENSOR_GPU_INL_H_ 8 #define MSHADOW_TENSOR_GPU_INL_H_ 19 cudaGetDeviceCount(&device_count);
20 CHECK_GT(device_count, 0) <<
"Cannot find CUDA device. Please check CUDA-Configuration";
26 CHECK_LT(device_id, device_count) <<
"Incorrect Device ID";
37 template<
int dim,
typename DType>
43 obj->
size(dim - 1) *
sizeof(DType),
44 obj->
shape_.FlatTo2D()[0]));
49 obj->
shape_.Size() *
sizeof(DType), 1));
52 template<
int dim,
typename DType>
57 template<
typename A,
typename B,
int dim,
typename DType>
62 CHECK_EQ(_dst.
shape_, _src.
shape_) <<
"Copy:shape mismatch";
67 dst.
size(1) *
sizeof(DType),
75 template<
int dim,
typename DType>
79 Copy(dst, src, cudaMemcpyDeviceToHost, stream);
81 template<
int dim,
typename DType>
85 Copy(dst, src, cudaMemcpyDeviceToDevice, stream);
87 template<
int dim,
typename DType>
91 Copy(dst, src, cudaMemcpyHostToDevice, stream);
93 #endif // MSHADOW_USE_CUDA 98 #include "./cuda/tensor_gpu-inl.cuh" 101 template<
typename Saver,
typename R,
int dim,
102 typename DType,
typename E,
int etype>
106 ::Error_All_Tensor_in_Exp_Must_Have_Same_Type();
109 CHECK(eshape[0] == 0 || eshape == dshape)
110 <<
"Assignment: Shape of Tensors are not consistent with target, " 111 <<
"eshape: " << eshape <<
" dshape:" << dshape;
118 template<
typename Saver,
typename Reducer,
119 typename R,
typename DType,
typename E,
int etype>
124 ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
126 ::Check(exp.
self()).FlatTo2D();
128 CHECK_EQ(eshape[1], dshape[0]) <<
"MapReduceKeepLowest::reduction dimension do not match";
129 CHECK_NE(eshape[0], 0U) <<
"can not reduce over empty tensor";
130 cuda::MapReduceKeepLowest<Saver, Reducer>
135 template<
typename Saver,
typename Reducer,
int dimkeep,
136 typename R,
typename DType,
typename E,
int etype>
141 ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
146 CHECK_EQ(eshape[dimkeep], dshape[0]) <<
"MapReduceKeepHighDim::reduction dimension do not match";
150 eshape.ProdShape(dimkeep + 1, EShape::kSubdim),
151 eshape[EShape::kSubdim]);
153 cuda::MapReduceKeepDim1<Saver, Reducer>
157 template<
typename DType>
163 template<
typename DType>
169 template<
typename DType>
176 template<
typename DType>
184 template<
typename DType>
188 const DType &ignore_label) {
192 template<
typename DType>
196 const DType &ignore_label,
201 template<
typename DType>
208 template<
typename DType>
212 const DType &ignore_label) {
216 template<
bool clip,
typename IndexType,
typename DType>
220 cuda::AddTakeGrad<clip, IndexType, DType>(dst, index, src);
223 template<
typename IndexType,
typename DType>
231 template<
typename KDType,
typename VDType>
237 template<
typename IndexType,
typename DType>
245 #endif // MSHADOW_TENSOR_GPU_INL_H_ void FreeSpace(Tensor< cpu, dim, DType > *obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.h:122
void IndexFill(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...
Definition: tensor_cpu-inl.h:526
void SoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
CPU/GPU: softmax gradient.
Definition: tensor_cpu-inl.h:288
void SmoothSoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label, const float alpha)
Definition: tensor_cpu-inl.h:305
PaddingExp< SrcExp, DType, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, DType, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: pad.h:53
DType * dptr_
pointer to the data
Definition: tensor.h:416
Tensor RValue, this is the super type of all kinds of possible tensors.
Definition: tensor.h:391
used to help static type check
Definition: expr_engine-inl.h:312
void Copy(Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.h:127
void MapExp(TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.h:189
Definition: stream_gpu-inl.h:19
Shape< dimension > shape_
shape of the tensor
Definition: tensor.h:418
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s0, index_t s1, index_t s2, index_t s3)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:222
void SortByKey(Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...
Definition: tensor_cpu-inl.h:537
void Softmax(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...
Definition: tensor_cpu-inl.h:465
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:252
void MapReduceKeepLowest(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.h:205
static Shape< dim > Check(const E &t)
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:501
Definition: expr_engine-inl.h:327
int32_t index_t
type that will be used for index
Definition: base.h:291
void AllocSpace(Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.h:98
void ShutdownTensorEngine< gpu >(void)
Definition: tensor_gpu-inl.h:31
void AddTakeGradLargeBatch(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...
Definition: tensor_cpu-inl.h:516
runtime shape checking template get the shape of an expression, report error if shape mismatch ...
Definition: expr_engine-inl.h:346
void InitTensorEngine< gpu >(int dev_id)
Definition: tensor_gpu-inl.h:15
void MapReduceKeepHighDim(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.h:232
defines how expression exp can be evaluated and stored into dst
Definition: expression.h:61
const SubType & self(void) const
Definition: expression.h:64
Plan< BinaryMapExp< OP, TA, TB, DType, etype >, DType > MakePlan(const BinaryMapExp< OP, TA, TB, DType, etype > &e)
Definition: expr_engine-inl.h:221
void SetDevice< gpu >(int devid)
Definition: tensor_gpu-inl.h:34
void AddTakeGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...
Definition: tensor_cpu-inl.h:498
namespace for mshadow
Definition: base.h:282
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:487
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:423
general tensor
Definition: tensor.h:402
#define MSHADOW_MIN_PAD_RATIO
x dimension of data must be bigger pad_size * ratio to be alloced padded memory, otherwise use tide a...
Definition: base.h:65
computaion stream structure, used for asynchronous computations
Definition: tensor.h:365