Go to the documentation of this file.
24 #ifndef MSHADOW_PACKET_INL_H_
25 #define MSHADOW_PACKET_INL_H_
27 #if defined(__APPLE__) || defined(__FreeBSD__)
47 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2
49 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain
58 template<
typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
61 template<PacketArch Arch>
83 size_t pitch = ((lspace +
mask) >> bits) << bits;
86 void *res = _aligned_malloc(pitch * num_line, 1 << bits);
89 int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
90 CHECK_EQ(ret, 0) <<
"AlignedMallocPitch failed";
93 LOG(FATAL) <<
"AlignedMallocPitch failed";
96 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
99 #pragma GCC diagnostic pop
115 template<PacketArch Arch>
118 return !(pitch & ((1 << bits) - 1));
122 template<PacketArch Arch>
124 return CheckAlign<Arch>(
reinterpret_cast<size_t>(ptr));
132 template<
typename DType, PacketArch Arch>
136 const index_t fsize =
sizeof(DType);
137 return (((size * fsize +
mask) >> bits) << bits) / fsize;
145 template<
typename DType, PacketArch Arch>
148 const index_t fsize =
sizeof(DType);
149 return (((size * fsize) >> bits) << bits) / fsize;
158 template<
typename OP,
typename DType, PacketArch Arch>
163 template<
typename DType, PacketArch Arch>
171 template<
typename DType, PacketArch Arch>
179 template<
typename DType, PacketArch Arch>
187 template<
typename DType, PacketArch Arch>
196 template<
typename DType, PacketArch Arch>
206 template<
typename SV,
typename TFloat, PacketArch Arch>
214 template<
typename TFloat, PacketArch Arch>
215 struct Saver<sv::saveto, TFloat, Arch> {
224 #if MSHADOW_USE_SSE && !defined(__CUDACC__)
234 template<
typename ExpType,
typename DType, PacketArch Arch>
245 template <
typename Device,
int dim,
typename DType, PacketArch Arch>
249 :dptr_(t.dptr_), stride_(t.stride_) {}
254 return dptr_[y * stride_ + x];
262 template<
typename DType, PacketArch Arch>
277 template<
typename OP,
typename TA,
typename TB,
int etype,
typename DType, PacketArch Arch>
281 : lhs_(lhs), rhs_(rhs) {}
286 return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
294 template<
typename OP,
typename TA,
int etype,
typename DType, PacketArch Arch>
302 return OP::Map(src_.Eval(y, x));
309 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
310 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
313 template<PacketArch Arch,
typename DType>
317 template<PacketArch Arch,
typename T,
typename DType>
321 template<PacketArch Arch,
typename T,
int dim,
typename DType>
322 inline PacketPlan<T, DType, Arch>
326 template<PacketArch Arch,
typename OP,
typename TA,
typename DType,
int etype>
327 inline PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>
331 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
332 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
335 DType, Arch>(MakePacketPlan<Arch>(e.
lhs_), MakePacketPlan<Arch>(e.
rhs_));
345 template<
typename E, PacketArch Arch>
349 template<PacketArch Arch>
353 template<PacketArch Arch>
357 template<
typename DType, PacketArch Arch>
361 template<
int dim,
typename DType, PacketArch Arch>
365 template<
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
370 template<
typename OP,
typename TA,
typename TB,
typename DType,
int etype, PacketArch Arch>
378 template<
int dim,
typename E, PacketArch Arch>
380 inline static bool Check(
const E &exp) {
384 template<
int dim,
typename DType, PacketArch Arch>
390 template<
int dim,
typename DType, PacketArch Arch>
393 return packet::CheckAlign<Arch>(t.
dptr_) &&
394 packet::CheckAlign<Arch>(t.
stride_ *
sizeof(DType));
397 template<
int dim,
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
403 template<
int dim,
typename OP,
typename TA,
typename TB,
415 template<
typename SV,
typename E,
int dim,
typename DType, PacketArch Arch>
419 const index_t xlen = packet::LowerAlign<DType, Arch>(dst.
size(1));
422 #pragma omp parallel for
425 for (
index_t x = 0; x < xlen; x += packetSize) {
429 SV::Save(dst[y][x], plan.
Eval(y, x));
435 #endif // MSHADOW_PACKET_INL_H_
definitions of abstract expressions and expressions template
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:298
index_t openmp_index_t
openmp index for linux
Definition: base.h:336
@ kSSE2
Definition: packet-inl.h:43
const Container & self(void) const
Definition: expression.h:82
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:103
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:250
PacketArch
Definition: packet-inl.h:41
generic Packet operator
Definition: packet-inl.h:159
static check packet enable
Definition: packet-inl.h:346
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:199
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:190
Generic packet type.
Definition: packet-inl.h:59
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:406
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:399
general tensor
Definition: tensor.h:525
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:146
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:77
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:182
static const bool kEnabled
Definition: packet-inl.h:160
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:301
@ kPlain
Definition: packet-inl.h:42
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:133
support of sse2 packet optimization of some operations
support of plain packet that use the plain datatype.
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:285
packet::PacketArch PacketArch
Definition: packet-inl.h:231
binary map expression lhs [op] rhs
Definition: expression.h:334
device name CPU
Definition: tensor.h:39
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:282
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size()
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:57
const TA & src_
source expression
Definition: expression.h:407
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:231
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:174
Definition: packet-inl.h:379
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:297
DType scalar_
scalar value
Definition: expression.h:97
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:266
PacketPlan(DType scalar)
Definition: packet-inl.h:265
Definition: packet-inl.h:62
Definition: packet-inl.h:235
const TB & rhs_
right operand
Definition: expression.h:339
int32_t index_t
type that will be used for index
Definition: base.h:328
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:49
static const bool kPass
Definition: packet-inl.h:347
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:248
base class of all rvalues
Definition: expression.h:148
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:253
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:216
overloaded + operator between half_t and bf16_t
Definition: base.h:319
PacketPlan< BinaryMapExp< OP, TA, TB, DType, etype >, DType, Arch > MakePacketPlan(const BinaryMapExp< OP, TA, TB, DType, etype > &e)
Definition: packet-inl.h:333
static bool Check(const E &exp)
Definition: packet-inl.h:380
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:43
static const index_t value
Definition: packet-inl.h:63
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:392
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:269
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:624
const TA & lhs_
left operand
Definition: expression.h:337
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:416
DType * dptr_
pointer to the data
Definition: tensor.h:539
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:166
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:208
Definition: packet-inl.h:207
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:610
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:386
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:116
unary map expression op(src)
Definition: expression.h:404
definitions of base types, operators, macros functions
scalar expression
Definition: expression.h:95
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:280
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:546
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:106