overloaded + operator between half_t and bf16_t More...

Namespaces
	bfloat

	expr
	namespace for abstract expressions and expressions template, have no dependency on tensor.h, These data structure takes no charge in computations, they are only used to define operations and represent expression in a symbolic way

	half

	isinf_typed
	determines if the given floating point number is a positive or negative infinity

	isnan_typed
	determines if the given floating point number is not a number

	op
	namespace for operators

	packet
	namespace of packet math

	red
	namespace for potential reducer operations

	sv
	namespace for savers

	utils

Classes
struct	cpu
	device name CPU More...

struct	DataType

struct	DataType< bfloat::bf16_t >

struct	DataType< bool >

struct	DataType< double >

struct	DataType< float >

struct	DataType< half::half2_t >

struct	DataType< half::half_t >

struct	DataType< int32_t >

struct	DataType< int64_t >

struct	DataType< int8_t >

struct	DataType< uint8_t >

struct	gpu
	device name GPU More...

struct	LayoutType

struct	LayoutType< kNCDHW >

struct	LayoutType< kNCHW >

struct	LayoutType< kNDHWC >

struct	LayoutType< kNHWC >

struct	MapExpCPUEngine

struct	MapExpCPUEngine< true, SV, Tensor< cpu, dim, DType >, dim, DType, E, etype >

class	Random
	random number generator More...

class	Random< cpu, DType >
	CPU random number generator. More...

class	Random< gpu, DType >
	GPU random number generator. More...

struct	Shape
	shape of a tensor More...

struct	Stream
	computaion stream structure, used for asynchronous computations More...

struct	Stream< gpu >

struct	Tensor
	general tensor More...

struct	Tensor< Device, 1, DType >

class	TensorContainer
	tensor container that does memory allocation and resize like STL, use it to save the lines of FreeSpace in class. Do not abuse it, efficiency can come from pre-allocation and no re-allocation More...

struct	TRValue
	Tensor RValue, this is the super type of all kinds of possible tensors. More...

Typedefs
typedef int32_t	index_t
	type that will be used for index More...

typedef index_t	openmp_index_t
	openmp index for linux More...

typedef float	default_real_t
	float point type that will be used in default by mshadow More...

Enumerations
enum	TypeFlag { kFloat32 = 0, kFloat64 = 1, kFloat16 = 2, kUint8 = 3, kInt32 = 4, kInt8 = 5, kInt64 = 6, kBool = 7, kInt16 = 8, kUint16 = 9, kUint32 = 10, kUint64 = 11, kBfloat16 = 12 }
	data type flag More...

enum	LayoutFlag { kNCHW = 0, kNHWC, kCHWN, kNCW = 1 << 3, kNWC, kCWN, kNCDHW = 1 << 5, kNDHWC, kCDHWN }

Functions
size_t	mshadow_sizeof (int type)
	get data type size from type enum More...

std::string	dtype_string (const int dtype)

template<typename Device , typename DType >
void	GetBatchedView (DType *dst, DType src, int num, int stride, Stream< Device > *stream)
	CPU/GPU: Get a batched view of the src array. dst[i] = src + i * stride. More...

template<typename DType >
void	GetBatchedView (DType *dst, DType src, int num, int stride, Stream< cpu > *stream)

template<int dim, typename DType , typename TStream >
void	SaveBinary (TStream &fo, const Tensor< cpu, dim, DType > &src)
	CPU/GPU: save a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated. More...

template<int dim, typename DType , typename TStream >
void	SaveBinary (TStream &fo, const Tensor< gpu, dim, DType > &src)
	CPU/GPU: save a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated. More...

template<int dim, typename DType , typename TStream >
void	LoadBinary (TStream &fi, Tensor< cpu, dim, DType > *dst, bool pre_alloc)
	CPU/GPU: load a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated if pre_alloc is true , then space in dst is preallocated, and must have same shape of the tensor loaded if pre_alloc is false, then dst originally does not have space allocated, LoadBinary will allocate space for dst. More...

template<int dim, typename DType , typename TStream >
void	LoadBinary (TStream &fi, Tensor< gpu, dim, DType > *dst, bool pre_alloc)
	CPU/GPU: load a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated if pre_alloc is true , then space in dst is preallocated, and must have same shape of the tensor loaded if pre_alloc is false, then dst originally does not have space allocated, LoadBinary will allocate space for dst. More...

template<>
void	DeleteStream< gpu > (Stream< gpu > *stream)

template<>
Stream< gpu > *	NewStream< gpu > (bool create_blas_handle, bool create_dnn_handle, int dev_id)

template<int ndim>
std::ostream &	operator<< (std::ostream &os, const Shape< ndim > &shape)
	allow string printing of the shape More...

MSHADOW_XINLINE Shape< 1 >	Shape1 (index_t s0)
	construct a one dimension shape, stride will equal s0 More...

MSHADOW_XINLINE Shape< 2 >	Shape2 (index_t s0, index_t s1)
	construct a two dimension shape, stride will equal s0 More...

MSHADOW_XINLINE Shape< 3 >	Shape3 (index_t s0, index_t s1, index_t s2)
	construct a three dimension shape, stride will equal s0 More...

MSHADOW_XINLINE Shape< 4 >	Shape4 (index_t s0, index_t s1, index_t s2, index_t s3)
	construct a four dimension shape, stride will equal s0 More...

MSHADOW_XINLINE Shape< 5 >	Shape5 (index_t s0, index_t s1, index_t s2, index_t s3, index_t s4)
	construct a five dimension shape, stride will equal s0 More...

Shape< 3 >	ConvertLayout (const Shape< 3 > &src, int src_layout, int dst_layout)
	Convert shape in src_layout to shape in dst_layout. More...

Shape< 4 >	ConvertLayout (const Shape< 4 > &src, int src_layout, int dst_layout)
	Convert shape in src_layout to shape in dst_layout. More...

Shape< 5 >	ConvertLayout (const Shape< 5 > &src, int src_layout, int dst_layout)
	Convert shape in src_layout to shape in dst_layout. More...

template<typename Device >
void	InitTensorEngine (int device_id=0)
	initialize tensor engine, used to call intialization functions of dependent libs this function should be called before all GPU tensor operations, for using tensors in CPU, this call is actually not needed More...

template<typename Device >
void	ShutdownTensorEngine (void)
	Shutdown tensor engine on current device this function should be called after all GPU tensor operations, for using tensors in CPU, this call is actually not needed. More...

template<typename Device >
void	SetDevice (int devid)
	set the device of current thread to work on More...

template<typename Device >
Stream< Device > *	NewStream (bool create_blas_handle, bool create_dnn_handle, int dev_id=-1)
	create a new stream from system More...

template<typename Device >
Stream< Device > *	NewStream (int dev_id)
	default behavior: create cublas handle More...

template<typename Device >
void	DeleteStream (Stream< Device > *stream)
	delete the computing stream More...

template<int dim, typename DType >
void	AllocSpace (Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
	CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape. More...

template<int dim, typename DType >
void	AllocSpace (Tensor< gpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
	CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape. More...

template<int dim, typename DType >
void	FreeSpace (Tensor< cpu, dim, DType > *obj)
	CPU/GPU: free the space of tensor, will set obj.dptr to NULL. More...

template<int dim, typename DType >
void	FreeSpace (Tensor< gpu, dim, DType > *obj)
	CPU/GPU: free the space of tensor, will set obj.dptr to NULL. More...

template<typename Device , typename DType , int dim>
Tensor< Device, dim, DType >	NewTensor (const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL)
	CPU/GPU: short cut to allocate and initialize a Tensor. More...

template<int dim, typename DType >
void	Copy (Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
	copy data from one tensor to another, with same shape More...

template<int dim, typename DType >
void	Copy (Tensor< cpu, dim, DType > dst, const Tensor< gpu, dim, DType > &src, Stream< gpu > *stream=NULL)
	copy data from one tensor to another, with same shape More...

template<int dim, typename DType >
void	Copy (Tensor< gpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< gpu > *stream=NULL)
	copy data from one tensor to another, with same shape More...

template<int dim, typename DType >
void	Copy (Tensor< gpu, dim, DType > dst, const Tensor< gpu, dim, DType > &src, Stream< gpu > *stream=NULL)
	copy data from one tensor to another, with same shape More...

template<typename DType >
void	Softmax (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
	CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) More...

template<typename DType >
void	Softmax (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 2, DType > &energy)
	CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) More...

template<typename DType >
void	SoftmaxGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
	CPU/GPU: softmax gradient. More...

template<typename DType >
void	SoftmaxGrad (const Tensor< gpu, 2, DType > &dst, const Tensor< gpu, 2, DType > &src, const Tensor< gpu, 1, DType > &label)
	CPU/GPU: softmax gradient. More...

template<bool clip = true, typename IndexType , typename DType >
void	AddTakeGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
	CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize. More...

template<bool clip = true, typename IndexType , typename DType >
void	AddTakeGrad (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src)
	CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize. More...

template<typename IndexType , typename DType >
void	AddTakeGradLargeBatch (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
	CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim. More...

template<typename IndexType , typename DType >
void	AddTakeGradLargeBatch (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &sorted, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src)
	CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim. More...

template<typename IndexType , typename DType >
void	IndexFill (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
	CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic. More...

template<typename IndexType , typename DType >
void	IndexFill (Tensor< gpu, 2, DType > dst, const Tensor< gpu, 1, IndexType > &index, const Tensor< gpu, 2, DType > &src)
	CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic. More...

template<typename KDType , typename VDType >
void	SortByKey (Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
	CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) More...

template<typename KDType , typename VDType >
void	SortByKey (Tensor< gpu, 1, KDType > keys, Tensor< gpu, 1, VDType > values, bool is_ascend=true)
	CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) More...

template<typename Device , typename VDType , typename SDType >
void	VectorizedSort (Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments)
	CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an ascending ordered vector like [0, 0, 0, 1, 1, 2, 3, 3, 3,...] We sort separately the keys labeled by 0 and 1, 2, 3, etc. Currently only supports sorting in ascending order !! More...

template<typename Saver , typename R , int dim, typename DType , typename E , int etype>
void	MapExp (TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
	CPU/GPU: map a expression to a tensor, this function calls MapPlan. More...

template<typename Saver , typename R , int dim, typename DType , typename E , int etype>
void	MapExp (TRValue< R, gpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
	CPU/GPU: map a expression to a tensor, this function calls MapPlan. More...

template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype>
void	MapReduceKeepLowest (TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
	CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) More...

template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype>
void	MapReduceKeepLowest (TRValue< R, gpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
	CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) More...

template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype>
void	MapReduceKeepHighDim (TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
	CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) More...

template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype>
void	MapReduceKeepHighDim (TRValue< R, gpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
	CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) More...

template<typename Device , typename DType >
void	VectorDot (Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs)
	CPU/GPU: 1 dimension vector dot. More...

template<bool transpose_left, bool transpose_right, typename Device , typename DType >
void	BatchGEMM (Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType * > workspace)
	CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst. More...

template<>
void	InitTensorEngine< cpu > (int dev_id)

template<>
void	ShutdownTensorEngine< cpu > (void)

template<>
void	SetDevice< cpu > (int devid)

template<>
Stream< cpu > *	NewStream< cpu > (bool create_blas_handle, bool create_dnn_handle, int dev_id)

template<>
void	DeleteStream< cpu > (Stream< cpu > *stream)

template<typename xpu >
void *	AllocHost_ (size_t size)

template<typename xpu >
void	FreeHost_ (void *dptr)

template<>
void *	AllocHost_< cpu > (size_t size)

template<>
void	FreeHost_< cpu > (void *dptr)

template<typename xpu , int dim, typename DType >
void	AllocHost (Tensor< cpu, dim, DType > *obj)

template<typename xpu , int dim, typename DType >
void	FreeHost (Tensor< cpu, dim, DType > *obj)

template<typename Saver , typename R , int dim, typename DType , typename E >
void	MapPlan (TRValue< R, cpu, dim, DType > *dst, const expr::Plan< E, DType > &plan)

template<typename DType >
void	Softmax (Tensor< cpu, 1, DType > dst, const Tensor< cpu, 1, DType > &energy)

template<typename DType >
void	SmoothSoftmaxGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label, const float alpha)

template<typename DType >
void	SoftmaxGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label, const DType &ignore_label)

template<typename DType >
void	SmoothSoftmaxGrad (Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label, const DType &ignore_label, const float alpha)

template<typename DType >
void	SoftmaxGrad (Tensor< cpu, 3, DType > dst, const Tensor< cpu, 3, DType > &src, const Tensor< cpu, 2, DType > &label)

template<typename DType >
void	SmoothSoftmaxGrad (Tensor< cpu, 3, DType > dst, const Tensor< cpu, 3, DType > &src, const Tensor< cpu, 2, DType > &label, const float alpha)

template<typename DType >
void	SoftmaxGrad (Tensor< cpu, 3, DType > dst, const Tensor< cpu, 3, DType > &src, const Tensor< cpu, 2, DType > &label, const DType &ignore_label)

template<typename DType >
void	SmoothSoftmaxGrad (Tensor< cpu, 3, DType > dst, const Tensor< cpu, 3, DType > &src, const Tensor< cpu, 2, DType > &label, const DType &ignore_label, const float alpha)

template<typename DType >
void	Softmax (Tensor< cpu, 3, DType > dst, const Tensor< cpu, 3, DType > &energy)

template<>
void	InitTensorEngine< gpu > (int dev_id)

template<>
void	ShutdownTensorEngine< gpu > (void)

template<>
void	SetDevice< gpu > (int devid)

template<typename A , typename B , int dim, typename DType >
void	Copy (Tensor< A, dim, DType > _dst, Tensor< B, dim, DType > _src, cudaMemcpyKind kind, Stream< gpu > *stream)

Variables
const unsigned	kRandBufferSize = 1000000
	buffer size for each random number generator More...

const float	kPi = 3.1415926f
	pi More...

const int	default_type_flag = DataType<default_real_t>::kFlag
	type enum value for default real type More...

const int	default_layout = kNCHW
	default layout for 4d tensor More...

const int	default_layout_5d = kNCDHW
	default layout for 5d tensor More...

Detailed Description

overloaded + operator between half_t and bf16_t

namespace for mshadow

overloaded - operator between half_t and bf16_t

overloaded * operator between half_t and bf16_t

overloaded / operator between half_t and bf16_t

overloaded > operator between half_t and bf16_t

overloaded < operator between half_t and bf16_t

overloaded >= operator between half_t and bf16_t

overloaded <= operator between half_t and bf16_t

namespace for mshadow

Typedef Documentation

typedef float mshadow::default_real_t

float point type that will be used in default by mshadow

typedef int32_t mshadow::index_t

type that will be used for index

typedef index_t mshadow::openmp_index_t

openmp index for linux

Enumeration Type Documentation

enum mshadow::LayoutFlag

layout flag

Enumerator
kNCHW
kNHWC
kCHWN
kNCW
kNWC
kCWN
kNCDHW
kNDHWC
kCDHWN

enum mshadow::TypeFlag

data type flag

Enumerator
kFloat32
kFloat64
kFloat16
kUint8
kInt32
kInt8
kInt64
kBool
kInt16
kUint16
kUint32
kUint64
kBfloat16

Function Documentation

template<bool clip = true, typename IndexType , typename DType >

void mshadow::AddTakeGrad	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 1, IndexType > &	index,
		const Tensor< cpu, 2, DType > &	src
	)

inline

CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize.

Parameters

dst	destination
index	index to take
src	source output

template<bool clip = true, typename IndexType , typename DType >

void mshadow::AddTakeGrad	(	Tensor< gpu, 2, DType >	dst,
		const Tensor< gpu, 1, IndexType > &	index,
		const Tensor< gpu, 2, DType > &	src
	)

inline

CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim of src is much larger than the batchsize.

Parameters

dst	destination
index	index to take
src	source output

template<typename IndexType , typename DType >

void mshadow::AddTakeGradLargeBatch	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 1, IndexType > &	sorted,
		const Tensor< cpu, 1, IndexType > &	index,
		const Tensor< cpu, 2, DType > &	src
	)

inline

CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim.

Parameters

dst	destination
sorted	the sorted indices
index	original index of the sorted indices
src	source output

template<typename IndexType , typename DType >

void mshadow::AddTakeGradLargeBatch	(	Tensor< gpu, 2, DType >	dst,
		const Tensor< gpu, 1, IndexType > &	sorted,
		const Tensor< gpu, 1, IndexType > &	index,
		const Tensor< gpu, 2, DType > &	src
	)

inline

CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the batchsize of src is larger than the featuredim.

Parameters

dst	destination
sorted	the sorted indices
index	original index of the sorted indices
src	source output

template<typename xpu , int dim, typename DType >

void mshadow::AllocHost ( Tensor< cpu, dim, DType > * obj )

inline

template<typename xpu >

void* mshadow::AllocHost_ ( size_t size )

inline

template<>

void* mshadow::AllocHost_< cpu > ( size_t size )

inline

template<int dim, typename DType >

void mshadow::AllocSpace	(	Tensor< cpu, dim, DType > *	obj,
		bool	pad = `MSHADOW_ALLOC_PAD`
	)

inline

CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape.

Parameters

obj	the tensor object, with shape specified
pad	whether padding dimension 0, to make last dimension aligned, padding may help improve efficiency of matrix multiplications if true, will allocate space with stride_ that may not equals shape[0] if false, will allocate continuous space

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<int dim, typename DType >

void mshadow::AllocSpace	(	Tensor< gpu, dim, DType > *	obj,
		bool	pad = `MSHADOW_ALLOC_PAD`
	)

inline

CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible to set the stride_ in each obj.shape.

Parameters

obj	the tensor object, with shape specified
pad	whether padding dimension 0, to make last dimension aligned, padding may help improve efficiency of matrix multiplications if true, will allocate space with stride_ that may not equals shape[0] if false, will allocate continuous space

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<bool transpose_left, bool transpose_right, typename Device , typename DType >

void mshadow::BatchGEMM	(	Tensor< Device, 3, DType >	dst,
		const Tensor< Device, 3, DType > &	lhs,
		const Tensor< Device, 3, DType > &	rhs,
		DType	alpha,
		DType	beta,
		Tensor< Device, 1, DType * >	workspace
	)

inline

CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst.

Parameters

dst	Length 3 tensor, used to hold the result
lhs	Left operand vector
rhs	Right operand vector
alpha	multiplier of op(lhs)op(rhs)
beta	multiplier of dst
workspace	Workspace for casting DType* to DType** (batched-view), must have size >= 3 * batch_size

Shape<3> mshadow::ConvertLayout	(	const Shape< 3 > &	src,
		int	src_layout,
		int	dst_layout
	)

inline

Convert shape in src_layout to shape in dst_layout.

Parameters

src	original shape
src_layout	layout of original shape
dst_layout	target layout

Returns: shape in target layout

Shape<4> mshadow::ConvertLayout	(	const Shape< 4 > &	src,
		int	src_layout,
		int	dst_layout
	)

inline

Convert shape in src_layout to shape in dst_layout.

Parameters

src	original shape
src_layout	layout of original shape
dst_layout	target layout

Returns: shape in target layout

Shape<5> mshadow::ConvertLayout	(	const Shape< 5 > &	src,
		int	src_layout,
		int	dst_layout
	)

inline

Convert shape in src_layout to shape in dst_layout.

Parameters

src	original shape
src_layout	layout of original shape
dst_layout	target layout

Returns: shape in target layout

template<typename A , typename B , int dim, typename DType >

void mshadow::Copy	(	Tensor< A, dim, DType >	_dst,
		Tensor< B, dim, DType >	_src,
		cudaMemcpyKind	kind,
		Stream< gpu > *	stream
	)

inline

template<int dim, typename DType >

void mshadow::Copy	(	Tensor< cpu, dim, DType >	dst,
		const Tensor< cpu, dim, DType > &	src,
		Stream< cpu > *	stream = `NULL`
	)

inline

copy data from one tensor to another, with same shape

Parameters

dst	target tensor
src	source tensor
stream	the stream, when specified, the copy can exhibit asynchronize behavior

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<int dim, typename DType >

void mshadow::Copy	(	Tensor< cpu, dim, DType >	dst,
		const Tensor< gpu, dim, DType > &	src,
		Stream< gpu > *	stream = `NULL`
	)

inline

copy data from one tensor to another, with same shape

Parameters

dst	target tensor
src	source tensor
stream	the stream, when specified, the copy can exhibit asynchronize behavior

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<int dim, typename DType >

void mshadow::Copy	(	Tensor< gpu, dim, DType >	dst,
		const Tensor< cpu, dim, DType > &	src,
		Stream< gpu > *	stream = `NULL`
	)

inline

copy data from one tensor to another, with same shape

Parameters

dst	target tensor
src	source tensor
stream	the stream, when specified, the copy can exhibit asynchronize behavior

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<int dim, typename DType >

void mshadow::Copy	(	Tensor< gpu, dim, DType >	dst,
		const Tensor< gpu, dim, DType > &	src,
		Stream< gpu > *	stream = `NULL`
	)

inline

copy data from one tensor to another, with same shape

Parameters

dst	target tensor
src	source tensor
stream	the stream, when specified, the copy can exhibit asynchronize behavior

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<typename Device >

void mshadow::DeleteStream ( Stream< Device > * stream )

inline

delete the computing stream

Parameters

stream the stream parameter to be deleted

template<>

void mshadow::DeleteStream< cpu > ( Stream< cpu > * stream )

inline

template<>

void mshadow::DeleteStream< gpu > ( Stream< gpu > * stream )

inline

std::string mshadow::dtype_string ( const int dtype )

inline

template<typename xpu , int dim, typename DType >

void mshadow::FreeHost ( Tensor< cpu, dim, DType > * obj )

inline

template<typename xpu >

void mshadow::FreeHost_ ( void * dptr )

inline

template<>

void mshadow::FreeHost_< cpu > ( void * dptr )

inline

template<int dim, typename DType >

void mshadow::FreeSpace ( Tensor< cpu, dim, DType > * obj )

inline

CPU/GPU: free the space of tensor, will set obj.dptr to NULL.

Parameters

obj	the tensor object

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<int dim, typename DType >

void mshadow::FreeSpace ( Tensor< gpu, dim, DType > * obj )

inline

CPU/GPU: free the space of tensor, will set obj.dptr to NULL.

Parameters

obj	the tensor object

Template Parameters

dim	specify the dim of tensor
DType	type of element in tensor

template<typename Device , typename DType >

void mshadow::GetBatchedView	(	DType **	dst,
		DType *	src,
		int	num,
		int	stride,
		Stream< Device > *	stream
	)

inline

CPU/GPU: Get a batched view of the src array. dst[i] = src + i * stride.

Parameters

dst	2D pointer
src	1D pointer
num	number of batches
stride	size of each batch
stream

template<typename DType >

void mshadow::GetBatchedView	(	DType **	dst,
		DType *	src,
		int	num,
		int	stride,
		Stream< cpu > *	stream
	)

inline

template<typename IndexType , typename DType >

void mshadow::IndexFill	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 1, IndexType > &	index,
		const Tensor< cpu, 2, DType > &	src
	)

inline

CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic.

Parameters

dst	destination
index	the index to accumulate value
src	source output

template<typename IndexType , typename DType >

void mshadow::IndexFill	(	Tensor< gpu, 2, DType >	dst,
		const Tensor< gpu, 1, IndexType > &	index,
		const Tensor< gpu, 2, DType > &	src
	)

inline

CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix. dst[index[i]] = src[i] Will use atomicAdd in the inner implementation and the result may not be deterministic.

Parameters

dst	destination
index	the index to accumulate value
src	source output

template<typename Device >

void mshadow::InitTensorEngine ( int device_id = 0 )

inline

initialize tensor engine, used to call intialization functions of dependent libs this function should be called before all GPU tensor operations, for using tensors in CPU, this call is actually not needed

Parameters

device_id GPU device id to be choosed

Template Parameters

Device the device type

template<>

void mshadow::InitTensorEngine< cpu > ( int dev_id )

inline

template<>

void mshadow::InitTensorEngine< gpu > ( int dev_id )

inline

template<int dim, typename DType , typename TStream >

void mshadow::LoadBinary	(	TStream &	fi,
		Tensor< cpu, dim, DType > *	dst,
		bool	pre_alloc
	)

inline

CPU/GPU: load a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated if pre_alloc is true , then space in dst is preallocated, and must have same shape of the tensor loaded if pre_alloc is false, then dst originally does not have space allocated, LoadBinary will allocate space for dst.

Parameters

fi	output binary stream
dst	destination file
pre_alloc	whether space is pre-allocated, if false, space allocation will happen

Template Parameters

dim	dimension of tensor
DType	type of element in tensor
TStream	type of stream, need to support Read, Write, one example is utils::IStream.

template<int dim, typename DType , typename TStream >

void mshadow::LoadBinary	(	TStream &	fi,
		Tensor< gpu, dim, DType > *	dst,
		bool	pre_alloc
	)

inline

CPU/GPU: load a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated if pre_alloc is true , then space in dst is preallocated, and must have same shape of the tensor loaded if pre_alloc is false, then dst originally does not have space allocated, LoadBinary will allocate space for dst.

Parameters

fi	output binary stream
dst	destination file
pre_alloc	whether space is pre-allocated, if false, space allocation will happen

Template Parameters

dim	dimension of tensor
DType	type of element in tensor
TStream	type of stream, need to support Read, Write, one example is utils::IStream.

template<typename Saver , typename R , int dim, typename DType , typename E , int etype>

void mshadow::MapExp	(	TRValue< R, cpu, dim, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp
	)

inline

CPU/GPU: map a expression to a tensor, this function calls MapPlan.

Template Parameters

Saver	specify storage method
R	specifies the storage type of the tensor
dim	dim of the tensor, during usage, there is no need to specify this parameter
DType	the type of elements in the tensor
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression

See also: namespace mshadow:sv, mshadow::op, mshadow::expr

template<typename Saver , typename R , int dim, typename DType , typename E , int etype>

void mshadow::MapExp	(	TRValue< R, gpu, dim, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp
	)

inline

CPU/GPU: map a expression to a tensor, this function calls MapPlan.

Template Parameters

Saver	specify storage method
R	specifies the storage type of the tensor
dim	dim of the tensor, during usage, there is no need to specify this parameter
DType	the type of elements in the tensor
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression

See also: namespace mshadow:sv, mshadow::op, mshadow::expr

template<typename Saver , typename R , int dim, typename DType , typename E >

void mshadow::MapPlan	(	TRValue< R, cpu, dim, DType > *	dst,
		const expr::Plan< E, DType > &	plan
	)

inline

template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype>

void mshadow::MapReduceKeepHighDim	(	TRValue< R, cpu, 1, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp,
		DType	scale = `1`
	)

inline

CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2)

Template Parameters

Saver	specify storage method
Reducer	specify a reducer method
R	specifies the storage type of the tensor
DType	the type of elements in the tensor
dimkeep	the target dimension to be kept, should be larger than 0, for 0, use MapReduceKeepLowest
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression
scale	scale the result before save

See also: namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr

template<typename Saver , typename Reducer , int dimkeep, typename R , typename DType , typename E , int etype>

void mshadow::MapReduceKeepHighDim	(	TRValue< R, gpu, 1, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp,
		DType	scale = `1`
	)

inline

CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2)

Template Parameters

Saver	specify storage method
Reducer	specify a reducer method
R	specifies the storage type of the tensor
DType	the type of elements in the tensor
dimkeep	the target dimension to be kept, should be larger than 0, for 0, use MapReduceKeepLowest
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression
scale	scale the result before save

See also: namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr

template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype>

void mshadow::MapReduceKeepLowest	(	TRValue< R, cpu, 1, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp,
		DType	scale = `1`
	)

inline

CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0)

Template Parameters

Saver	specify storage method
Reducer	specify a reducer method
R	specifies the storage type of the tensor
DType	the type of elements in the tensor
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression
scale	scale the result before save

See also: namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr

template<typename Saver , typename Reducer , typename R , typename DType , typename E , int etype>

void mshadow::MapReduceKeepLowest	(	TRValue< R, gpu, 1, DType > *	dst,
		const expr::Exp< E, DType, etype > &	exp,
		DType	scale = `1`
	)

inline

CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0)

Template Parameters

Saver	specify storage method
Reducer	specify a reducer method
R	specifies the storage type of the tensor
DType	the type of elements in the tensor
E	specifies the expression type, not need to specify this parameter during usage
etype	expression type

Parameters

dst	destination
exp	expression
scale	scale the result before save

See also: namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr

size_t mshadow::mshadow_sizeof ( int type )

inline

get data type size from type enum

template<typename Device >

Stream<Device>* mshadow::NewStream	(	bool	create_blas_handle,
		bool	create_dnn_handle,
		int	dev_id = `-1`
	)

inline

create a new stream from system

Parameters

create_blas_handle	whether create blas & cusolver handle in stream
create_dnn_handle	whether create cudnn handle in stream
dev_id	device id

Returns: a pointer to the created stream

Template Parameters

Device the device type

template<typename Device >

Stream<Device>* mshadow::NewStream ( int dev_id )

inline

default behavior: create cublas handle

Parameters

dev_id device id

Returns: a pointer to the created stream

template<>

Stream<cpu>* mshadow::NewStream< cpu >	(	bool	create_blas_handle,
		bool	create_dnn_handle,
		int	dev_id
	)

inline

template<>

Stream<gpu>* mshadow::NewStream< gpu >	(	bool	create_blas_handle,
		bool	create_dnn_handle,
		int	dev_id
	)

inline

template<typename Device , typename DType , int dim>

Tensor< Device, dim, DType > mshadow::NewTensor	(	const Shape< dim > &	shape,
		DType	initv,
		bool	pad = `MSHADOW_ALLOC_PAD`,
		Stream< Device > *	stream = `NULL`
	)

inline

CPU/GPU: short cut to allocate and initialize a Tensor.

Parameters

shape	shape of tensor
initv	initialization value
pad	: padding option
stream	: stream of tensor

Template Parameters

Device	device of tensor
DType	type of element in tensor
dim	dimention of tensor

Returns: a new allocated tensor

See also: AllocSpace

template<int ndim>

std::ostream & mshadow::operator<<	(	std::ostream &	os,
		const Shape< ndim > &	shape
	)

inline

allow string printing of the shape

Parameters

os	the output stream
shape	the shape

Returns: the ostream

template<int dim, typename DType , typename TStream >

void mshadow::SaveBinary	(	TStream &	fo,
		const Tensor< cpu, dim, DType > &	src
	)

inline

CPU/GPU: save a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated.

Parameters

fo	output binary stream
src	source data file

Template Parameters

dim	dimension of tensor
DType	type of element in tensor
TStream	type of stream, need to support Read, Write, one example is utils::IStream.

template<int dim, typename DType , typename TStream >

void mshadow::SaveBinary	(	TStream &	fo,
		const Tensor< gpu, dim, DType > &	src
	)

inline

CPU/GPU: save a tensor by binary format, for GPU version, a temp Tensor<cpu,dim> storage will be allocated.

Parameters

fo	output binary stream
src	source data file

Template Parameters

dim	dimension of tensor
DType	type of element in tensor
TStream	type of stream, need to support Read, Write, one example is utils::IStream.

template<typename Device >

void mshadow::SetDevice ( int devid )

inline

set the device of current thread to work on

Parameters

devid the device id

Template Parameters

Device the device type

template<>

void mshadow::SetDevice< cpu > ( int devid )

inline

template<>

void mshadow::SetDevice< gpu > ( int devid )

inline

MSHADOW_XINLINE Shape<1> mshadow::Shape1 ( index_t s0 )

construct a one dimension shape, stride will equal s0

Parameters

s0	size of dimension 0

Returns: the shape construction

MSHADOW_XINLINE Shape<2> mshadow::Shape2	(	index_t	s0,
		index_t	s1
	)

construct a two dimension shape, stride will equal s0

Parameters

s0	size of dimension 0
s1	size of dimension 1

Returns: the shape construction

MSHADOW_XINLINE Shape<3> mshadow::Shape3	(	index_t	s0,
		index_t	s1,
		index_t	s2
	)

construct a three dimension shape, stride will equal s0

Parameters

s0	size of dimension 0
s1	size of dimension 1
s2	size of dimension 2

Returns: the shape construction

MSHADOW_XINLINE Shape<4> mshadow::Shape4	(	index_t	s0,
		index_t	s1,
		index_t	s2,
		index_t	s3
	)

construct a four dimension shape, stride will equal s0

Parameters

s0	size of dimension 0
s1	size of dimension 1
s2	size of dimension 2
s3	size of dimension 3

Returns: the shape construction

MSHADOW_XINLINE Shape<5> mshadow::Shape5	(	index_t	s0,
		index_t	s1,
		index_t	s2,
		index_t	s3,
		index_t	s4
	)

construct a five dimension shape, stride will equal s0

Parameters

s0	size of dimension 0
s1	size of dimension 1
s2	size of dimension 2
s3	size of dimension 3
s4	size of dimension 4

Returns: the shape construction

template<typename Device >

void mshadow::ShutdownTensorEngine ( void )

inline

Shutdown tensor engine on current device this function should be called after all GPU tensor operations, for using tensors in CPU, this call is actually not needed.

Template Parameters

Device the device type

template<>

void mshadow::ShutdownTensorEngine< cpu > ( void )

inline

template<>

void mshadow::ShutdownTensorEngine< gpu > ( void )

inline

template<typename DType >

void mshadow::SmoothSoftmaxGrad	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 2, DType > &	src,
		const Tensor< cpu, 1, DType > &	label,
		const float	alpha
	)

inline

template<typename DType >

void mshadow::SmoothSoftmaxGrad	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 2, DType > &	src,
		const Tensor< cpu, 1, DType > &	label,
		const DType &	ignore_label,
		const float	alpha
	)

inline

template<typename DType >

void mshadow::SmoothSoftmaxGrad	(	Tensor< cpu, 3, DType >	dst,
		const Tensor< cpu, 3, DType > &	src,
		const Tensor< cpu, 2, DType > &	label,
		const float	alpha
	)

inline

template<typename DType >

void mshadow::SmoothSoftmaxGrad	(	Tensor< cpu, 3, DType >	dst,
		const Tensor< cpu, 3, DType > &	src,
		const Tensor< cpu, 2, DType > &	label,
		const DType &	ignore_label,
		const float	alpha
	)

inline

template<typename DType >

void mshadow::Softmax	(	Tensor< cpu, 1, DType >	dst,
		const Tensor< cpu, 1, DType > &	energy
	)

inline

template<typename DType >

void mshadow::Softmax	(	Tensor< cpu, 3, DType >	dst,
		const Tensor< cpu, 3, DType > &	energy
	)

inline

template<typename DType >

void mshadow::Softmax	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 2, DType > &	energy
	)

inline

CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j]))

Parameters

dst	destination
energy	input energy

template<typename DType >

void mshadow::Softmax	(	Tensor< gpu, 2, DType >	dst,
		const Tensor< gpu, 2, DType > &	energy
	)

inline

CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j]))

Parameters

dst	destination
energy	input energy

template<typename DType >

void mshadow::SoftmaxGrad	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 2, DType > &	src,
		const Tensor< cpu, 1, DType > &	label,
		const DType &	ignore_label
	)

inline

template<typename DType >

void mshadow::SoftmaxGrad	(	Tensor< cpu, 3, DType >	dst,
		const Tensor< cpu, 3, DType > &	src,
		const Tensor< cpu, 2, DType > &	label
	)

inline

template<typename DType >

void mshadow::SoftmaxGrad	(	Tensor< cpu, 3, DType >	dst,
		const Tensor< cpu, 3, DType > &	src,
		const Tensor< cpu, 2, DType > &	label,
		const DType &	ignore_label
	)

inline

template<typename DType >

void mshadow::SoftmaxGrad	(	Tensor< cpu, 2, DType >	dst,
		const Tensor< cpu, 2, DType > &	src,
		const Tensor< cpu, 1, DType > &	label
	)

inline

CPU/GPU: softmax gradient.

Parameters

dst	destination
src	source output
label	label info

template<typename DType >

void mshadow::SoftmaxGrad	(	const Tensor< gpu, 2, DType > &	dst,
		const Tensor< gpu, 2, DType > &	src,
		const Tensor< gpu, 1, DType > &	label
	)

inline

CPU/GPU: softmax gradient.

Parameters

dst	destination
src	source output
label	label info

template<typename KDType , typename VDType >

void mshadow::SortByKey	(	Tensor< cpu, 1, KDType >	keys,
		Tensor< cpu, 1, VDType >	values,
		bool	is_ascend = `true`
	)

inline

CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)

Parameters

keys	the keys to sort
values	the values that sorts w.r.t the key
is_ascend	whether to sort key in ascending order

template<typename KDType , typename VDType >

void mshadow::SortByKey	(	Tensor< gpu, 1, KDType >	keys,
		Tensor< gpu, 1, VDType >	values,
		bool	is_ascend = `true`
	)

inline

CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!)

Parameters

keys	the keys to sort
values	the values that sorts w.r.t the key
is_ascend	whether to sort key in ascending order

template<typename Device , typename DType >

void mshadow::VectorDot	(	Tensor< Device, 1, DType >	dst,
		const Tensor< Device, 1, DType > &	lhs,
		const Tensor< Device, 1, DType > &	rhs
	)

inline

CPU/GPU: 1 dimension vector dot.

Parameters

dst	Length 1 vector, used to hold the result.
lhs	Left operand vector
rhs	Right operand vector

template<typename Device , typename VDType , typename SDType >

void mshadow::VectorizedSort	(	Tensor< Device, 1, VDType >	values,
		Tensor< Device, 1, SDType >	segments
	)

inline

CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an ascending ordered vector like [0, 0, 0, 1, 1, 2, 3, 3, 3,...] We sort separately the keys labeled by 0 and 1, 2, 3, etc. Currently only supports sorting in ascending order !!

Parameters

values	the data to sort
segments	segment indicator

Variable Documentation

const int mshadow::default_layout = kNCHW

default layout for 4d tensor

const int mshadow::default_layout_5d = kNCDHW

default layout for 5d tensor

const int mshadow::default_type_flag = DataType<default_real_t>::kFlag

type enum value for default real type

const float mshadow::kPi = 3.1415926f

pi

const unsigned mshadow::kRandBufferSize = 1000000

buffer size for each random number generator

Namespaces

Classes

Typedefs

Enumerations

Functions

Variables

Detailed Description

Typedef Documentation

Enumeration Type Documentation

Function Documentation

Variable Documentation