|
template<int ndim> |
std::ostream & | mshadow::operator<< (std::ostream &os, const Shape< ndim > &shape) |
| allow string printing of the shape More...
|
|
MSHADOW_XINLINE Shape< 1 > | mshadow::Shape1 (index_t s0) |
| construct a one dimension shape, stride will equal s0 More...
|
|
MSHADOW_XINLINE Shape< 2 > | mshadow::Shape2 (index_t s0, index_t s1) |
| construct a two dimension shape, stride will equal s0 More...
|
|
MSHADOW_XINLINE Shape< 3 > | mshadow::Shape3 (index_t s0, index_t s1, index_t s2) |
| construct a three dimension shape, stride will equal s0 More...
|
|
MSHADOW_XINLINE Shape< 4 > | mshadow::Shape4 (index_t s0, index_t s1, index_t s2, index_t s3) |
| construct a four dimension shape, stride will equal s0 More...
|
|
MSHADOW_XINLINE Shape< 5 > | mshadow::Shape5 (index_t s0, index_t s1, index_t s2, index_t s3, index_t s4) |
| construct a five dimension shape, stride will equal s0 More...
|
|
Shape< 3 > | mshadow::ConvertLayout (const Shape< 3 > &src, int src_layout, int dst_layout) |
| Convert shape in src_layout to shape in dst_layout. More...
|
|
Shape< 4 > | mshadow::ConvertLayout (const Shape< 4 > &src, int src_layout, int dst_layout) |
| Convert shape in src_layout to shape in dst_layout. More...
|
|
Shape< 5 > | mshadow::ConvertLayout (const Shape< 5 > &src, int src_layout, int dst_layout) |
| Convert shape in src_layout to shape in dst_layout. More...
|
|
template<typename Device > |
void | mshadow::InitTensorEngine (int device_id=0) |
| initialize tensor engine, used to call intialization functions of dependent libs this function should be called before all GPU tensor operations, for using tensors in CPU, this call is actually not needed More...
|
|
template<typename Device > |
void | mshadow::ShutdownTensorEngine (void) |
| Shutdown tensor engine on current device this function should be called after all GPU tensor operations, for using tensors in CPU, this call is actually not needed. More...
|
|
template<typename Device > |
void | mshadow::SetDevice (int devid) |
| set the device of current thread to work on More...
|
|
template<typename Device > |
Stream< Device > * | mshadow::NewStream (bool create_blas_handle, bool create_dnn_handle, int dev_id=-1) |
| create a new stream from system More...
|
|
template<typename Device > |
Stream< Device > * | mshadow::NewStream (int dev_id) |
| default behavior: create cublas handle More...
|
|
template<typename Device > |
void | mshadow::DeleteStream (Stream< Device > *stream) |
| delete the computing stream More...
|
|
template<int dim, typename DType > |
void | mshadow::AllocSpace (Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD) |
| CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape. More...
|
|
template<int dim, typename DType > |
void | mshadow::AllocSpace (Tensor< gpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD) |
| CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape. More...
|
|
template<int dim, typename DType > |
void | mshadow::FreeSpace (Tensor< cpu, dim, DType > *obj) |
| CPU/GPU: free the space of tensor, will set obj.dptr to NULL. More...
|
|
template<int dim, typename DType > |
void | mshadow::FreeSpace (Tensor< gpu, dim, DType > *obj) |
| CPU/GPU: free the space of tensor, will set obj.dptr to NULL. More...
|
|
template<typename Device , typename DType , int dim> |
Tensor< Device, dim, DType > | mshadow::NewTensor (const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL) |
| CPU/GPU: short cut to allocate and initialize a Tensor. More...
|
|
template<int dim, typename DType > |
void | mshadow::Copy (Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL) |
| copy data from one tensor to another, with same shape More...
|
|
template<int dim, typename DType > |
void | mshadow::Copy (Tensor< cpu, dim, DType > dst, const Tensor< gpu, dim, DType > &src, Stream< gpu > *stream=NULL) |
| copy data from one tensor to another, with same shape More...
|
|
template<int dim, typename DType > |
void | mshadow::Copy (Tensor< gpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< gpu > *stream=NULL) |
| copy data from one tensor to another, with same shape More...
|
|
template<int dim, typename DType > |
void | mshadow::Copy (Tensor< gpu, dim, DType > dst, const Tensor< gpu, dim, DType > &src, Stream< gpu > *stream=NULL) |
| copy data from one tensor to another, with same shape More...
|
|
template<typename DType > |
void | mshadow::Softmax (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy) |
| CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) More...
|
|
template<typename DType > |
void | mshadow::Softmax (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 2, DType > &energy) |
| CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) More...
|
|
template<typename DType > |
void | mshadow::SoftmaxGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label) |
| CPU/GPU: softmax gradient. More...
|
|
template<typename DType > |
void | mshadow::SoftmaxGrad (const Tensor< gpu, 2, DType > &dst, const Tensor< gpu, 2, DType > &src, const Tensor< gpu, 1, DType > &label) |
| CPU/GPU: softmax gradient. More...
|
|
template<bool clip = true, typename IndexType , typename DType > |
void | mshadow::AddTakeGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src) |
| CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize. More...
|
|
template<bool clip = true, typename IndexType , typename DType > |
void | mshadow::AddTakeGrad (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src) |
| CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize. More...
|
|
template<typename IndexType , typename DType > |
void | mshadow::AddTakeGradLargeBatch (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src) |
| CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim. More...
|
|
template<typename IndexType , typename DType > |
void | mshadow::AddTakeGradLargeBatch (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &sorted, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src) |
| CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim. More...
|
|
template<typename IndexType , typename DType > |
void | mshadow::IndexFill (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src) |
| CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic. More...
|
|
template<typename IndexType , typename DType > |
void | mshadow::IndexFill (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src) |
| CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic. More...
|
|
template<typename KDType , typename VDType > |
void | mshadow::SortByKey (Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true) |
| CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) More...
|
|
template<typename KDType , typename VDType > |
void | mshadow::SortByKey (Tensor< gpu, 1, KDType > keys, Tensor< gpu, 1, VDType > values, bool is_ascend=true) |
| CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) More...
|
|
template<typename Device , typename VDType , typename SDType > |
void | mshadow::VectorizedSort (Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments) |
| CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an ascending ordered vector like [0, 0, 0, 1, 1, 2, 3, 3, 3,...] We sort separately the keys labeled by 0 and 1, 2, 3, etc. Currently only supports sorting in ascending order !! More...
|
|
template<typename Saver , typename R , int dim, typename DType , typename E , int etype> |
void | mshadow::MapExp (TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp) |
| CPU/GPU: map a expression to a tensor, this function calls MapPlan. More...
|
|
template<typename Saver , typename R , int dim, typename DType , typename E , int etype> |
void | mshadow::MapExp (TRValue< R, gpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp) |
| CPU/GPU: map a expression to a tensor, this function calls MapPlan. More...
|
|
template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype> |
void | mshadow::MapReduceKeepLowest (TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1) |
| CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) More...
|
|
template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype> |
void | mshadow::MapReduceKeepLowest (TRValue< R, gpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1) |
| CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) More...
|
|
template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype> |
void | mshadow::MapReduceKeepHighDim (TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1) |
| CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) More...
|
|
template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype> |
void | mshadow::MapReduceKeepHighDim (TRValue< R, gpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1) |
| CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) More...
|
|
template<typename Device , typename DType > |
void | mshadow::VectorDot (Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs) |
| CPU/GPU: 1 dimension vector dot. More...
|
|
template<bool transpose_left, bool transpose_right, typename Device , typename DType > |
void | mshadow::BatchGEMM (Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType * > workspace) |
| CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst. More...
|
|