25 #ifndef MSHADOW_TENSOR_GPU_INL_H_
26 #define MSHADOW_TENSOR_GPU_INL_H_
37 cudaGetDeviceCount(&device_count);
38 CHECK_GT(device_count, 0) <<
"Cannot find CUDA device. Please check CUDA-Configuration";
44 CHECK_LT(device_id, device_count) <<
"Incorrect Device ID";
55 template<
int dim,
typename DType>
61 obj->
size(dim - 1) *
sizeof(DType),
62 obj->
shape_.FlatTo2D()[0]));
67 obj->
shape_.Size() *
sizeof(DType), 1));
70 template<
int dim,
typename DType>
75 template<
typename A,
typename B,
int dim,
typename DType>
80 CHECK_EQ(_dst.
shape_, _src.
shape_) <<
"Copy:shape mismatch";
85 dst.
size(1) *
sizeof(DType),
93 template<
int dim,
typename DType>
97 Copy(dst, src, cudaMemcpyDeviceToHost, stream);
99 template<
int dim,
typename DType>
103 Copy(dst, src, cudaMemcpyDeviceToDevice, stream);
105 template<
int dim,
typename DType>
109 Copy(dst, src, cudaMemcpyHostToDevice, stream);
111 #endif // MSHADOW_USE_CUDA
116 #include "./cuda/tensor_gpu-inl.cuh"
119 template<
typename Saver,
typename R,
int dim,
120 typename DType,
typename E,
int etype>
121 inline void MapExp(TRValue<R, gpu, dim, DType> *dst,
122 const expr::Exp<E, DType, etype> &exp) {
123 expr::TypeCheckPass<expr::TypeCheck<gpu, dim, DType, E>::kMapPass>
124 ::Error_All_Tensor_in_Exp_Must_Have_Same_Type();
127 CHECK(eshape[0] == 0 || eshape == dshape)
128 <<
"Assignment: Shape of Tensors are not consistent with target, "
129 <<
"eshape: " << eshape <<
" dshape:" << dshape;
130 cuda::MapPlan<Saver>(
MakePlan(dst->self()),
136 template<
typename Saver,
typename Reducer,
137 typename R,
typename DType,
typename E,
int etype>
139 const expr::Exp<E, DType, etype> &exp,
141 expr::TypeCheckPass<expr::TypeCheck<gpu, 1, DType, E>::kRedPass>
142 ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
143 Shape<2> eshape = expr::ShapeCheck<expr::ExpInfo<E>::kDim, E>
144 ::Check(exp.self()).FlatTo2D();
146 CHECK_EQ(eshape[1], dshape[0]) <<
"MapReduceKeepLowest::reduction dimension do not match";
147 CHECK_NE(eshape[0], 0U) <<
"can not reduce over empty tensor";
148 cuda::MapReduceKeepLowest<Saver, Reducer>
153 template<
typename Saver,
typename Reducer,
int dimkeep,
154 typename R,
typename DType,
typename E,
int etype>
156 const expr::Exp<E, DType, etype> &exp,
158 expr::TypeCheckPass<expr::TypeCheck<gpu, dimkeep, DType, E>::kRedPass>
159 ::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
160 typedef Shape<expr::ExpInfo<E>::kDim> EShape;
161 EShape eshape = expr::ShapeCheck<expr::ExpInfo<E>::kDim, E>
164 CHECK_EQ(eshape[dimkeep], dshape[0]) <<
"MapReduceKeepHighDim::reduction dimension do not match";
166 Shape<4> pshape =
Shape4(eshape.ProdShape(0, dimkeep),
168 eshape.ProdShape(dimkeep + 1, EShape::kSubdim),
169 eshape[EShape::kSubdim]);
171 cuda::MapReduceKeepDim1<Saver, Reducer>
175 template<
typename DType>
176 inline void Softmax(Tensor<gpu, 2, DType> dst,
177 const Tensor<gpu, 2, DType>& src) {
181 template<
typename DType>
182 inline void Softmax(Tensor<gpu, 3, DType> dst,
183 const Tensor<gpu, 3, DType>& src) {
187 template<
typename DType>
188 inline void SoftmaxGrad(
const Tensor<gpu, 2, DType> &dst,
189 const Tensor<gpu, 2, DType> &src,
190 const Tensor<gpu, 1, DType> &label) {
194 template<
typename DType>
196 const Tensor<gpu, 2, DType> &src,
197 const Tensor<gpu, 1, DType> &label,
202 template<
typename DType>
203 inline void SoftmaxGrad(
const Tensor<gpu, 2, DType> &dst,
204 const Tensor<gpu, 2, DType> &src,
205 const Tensor<gpu, 1, DType> &label,
206 const DType &ignore_label) {
210 template<
typename DType>
212 const Tensor<gpu, 2, DType> &src,
213 const Tensor<gpu, 1, DType> &label,
214 const DType &ignore_label,
219 template<
typename DType>
220 inline void SoftmaxGrad(
const Tensor<gpu, 3, DType> &dst,
221 const Tensor<gpu, 3, DType> &src,
222 const Tensor<gpu, 2, DType> &label) {
226 template<
typename DType>
227 inline void SoftmaxGrad(
const Tensor<gpu, 3, DType> &dst,
228 const Tensor<gpu, 3, DType> &src,
229 const Tensor<gpu, 2, DType> &label,
230 const DType &ignore_label) {
234 template<
bool clip,
typename IndexType,
typename DType>
236 const Tensor<gpu, 1, IndexType>& index,
237 const Tensor<gpu, 2, DType> &src) {
238 cuda::AddTakeGrad<clip, IndexType, DType>(dst, index, src);
241 template<
bool clip,
typename IndexType,
typename DType,
typename AType>
243 Tensor<gpu, 2, AType> temp,
244 const Tensor<gpu, 1, IndexType>& index,
245 const Tensor<gpu, 2, DType> &src) {
246 cuda::AddTakeGrad<clip, IndexType, DType>(dst, temp, index, src);
249 template<
typename IndexType,
typename DType>
251 const Tensor<gpu, 1, IndexType>& sorted,
252 const Tensor<gpu, 1, IndexType>& index,
253 const Tensor<gpu, 2, DType> &src) {
257 template<
typename KDType,
typename VDType>
258 inline void SortByKey(Tensor<gpu, 1, KDType> keys, Tensor<gpu, 1, VDType> values,
263 template<
typename IndexType,
typename DType>
264 inline void IndexFill(Tensor<gpu, 2, DType> dst,
265 const Tensor<gpu, 1, IndexType>& index,
266 const Tensor<gpu, 2, DType> &src) {
271 #endif // MSHADOW_TENSOR_GPU_INL_H_