25 #ifndef MSHADOW_PACKET_INL_H_ 26 #define MSHADOW_PACKET_INL_H_ 28 #if defined(__APPLE__) || defined(__FreeBSD__) 48 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2 50 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain 59 template<
typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
62 template<PacketArch Arch>
84 size_t pitch = ((lspace +
mask) >> bits) << bits;
87 void *res = _aligned_malloc(pitch * num_line, 1 << bits);
90 int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
91 CHECK_EQ(ret, 0) <<
"AlignedMallocPitch failed";
94 LOG(FATAL) <<
"AlignedMallocPitch failed";
112 template<PacketArch Arch>
115 return !(pitch & ((1 << bits) - 1));
119 template<PacketArch Arch>
121 return CheckAlign<Arch>(
reinterpret_cast<size_t>(ptr));
129 template<
typename DType, PacketArch Arch>
133 const index_t fsize =
sizeof(DType);
134 return (((size * fsize + mask) >> bits) << bits) / fsize;
142 template<
typename DType, PacketArch Arch>
145 const index_t fsize =
sizeof(DType);
146 return (((size * fsize) >> bits) << bits) / fsize;
155 template<
typename OP,
typename DType, PacketArch Arch>
157 static const bool kEnabled =
false;
160 template<
typename DType, PacketArch Arch>
162 static const bool kEnabled =
true;
168 template<
typename DType, PacketArch Arch>
170 static const bool kEnabled =
true;
176 template<
typename DType, PacketArch Arch>
178 static const bool kEnabled =
true;
184 template<
typename DType, PacketArch Arch>
186 static const bool kEnabled =
true;
193 template<
typename DType, PacketArch Arch>
195 static const bool kEnabled =
true;
203 template<
typename SV,
typename TFloat, PacketArch Arch>
211 template<
typename TFloat, PacketArch Arch>
212 struct Saver<sv::saveto, TFloat, Arch> {
221 #if MSHADOW_USE_SSE && !defined(__CUDACC__) 231 template<
typename ExpType,
typename DType, PacketArch Arch>
242 template <
typename Device,
int dim,
typename DType, PacketArch Arch>
246 :dptr_(t.dptr_), stride_(t.stride_) {}
251 return dptr_[y * stride_ + x];
259 template<
typename DType, PacketArch Arch>
274 template<
typename OP,
typename TA,
typename TB,
int etype,
typename DType, PacketArch Arch>
278 : lhs_(lhs), rhs_(rhs) {}
283 return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
291 template<
typename OP,
typename TA,
int etype,
typename DType, PacketArch Arch>
299 return OP::Map(src_.Eval(y, x));
306 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
310 template<PacketArch Arch,
typename DType>
314 template<PacketArch Arch,
typename T,
typename DType>
318 template<PacketArch Arch,
typename T,
int dim,
typename DType>
323 template<PacketArch Arch,
typename OP,
typename TA,
typename DType,
int etype>
326 return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.
src_));
328 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
329 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
331 return PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>,
332 DType, Arch>(MakePacketPlan<Arch>(e.
lhs_), MakePacketPlan<Arch>(e.
rhs_));
342 template<
typename E, PacketArch Arch>
344 static const bool kPass =
false;
346 template<PacketArch Arch>
348 static const bool kPass =
true;
350 template<PacketArch Arch>
352 static const bool kPass =
true;
354 template<
typename DType, PacketArch Arch>
358 template<
int dim,
typename DType, PacketArch Arch>
362 template<
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
367 template<
typename OP,
typename TA,
typename TB,
typename DType,
int etype, PacketArch Arch>
375 template<
int dim,
typename E, PacketArch Arch>
377 inline static bool Check(
const E &exp) {
381 template<
int dim,
typename DType, PacketArch Arch>
387 template<
int dim,
typename DType, PacketArch Arch>
390 return packet::CheckAlign<Arch>(t.
dptr_) &&
391 packet::CheckAlign<Arch>(t.
stride_ *
sizeof(DType));
394 template<
int dim,
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
400 template<
int dim,
typename OP,
typename TA,
typename TB,
401 typename DType,
int etype, PacketArch Arch>
412 template<
typename SV,
typename E,
int dim,
typename DType, PacketArch Arch>
416 const index_t xlen = packet::LowerAlign<DType, Arch>(dst.
size(1));
419 #pragma omp parallel for 422 for (
index_t x = 0; x < xlen; x += packetSize) {
426 SV::Save(dst[y][x], plan.
Eval(y, x));
432 #endif // MSHADOW_PACKET_INL_H_ MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:263
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:187
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:104
Definition: packet-inl.h:232
DType * dptr_
pointer to the data
Definition: tensor.h:435
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:295
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:250
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:103
const TB & rhs_
right operand
Definition: expression.h:340
Definition: packet-inl.h:376
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:298
static bool Check(const E &exp)
Definition: packet-inl.h:377
binary map expression lhs [op] rhs
Definition: expression.h:335
static const index_t value
Definition: packet-inl.h:64
PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch > MakePacketPlan(const UnaryMapExp< OP, TA, DType, etype > &e)
Definition: packet-inl.h:325
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:78
Definition: packet-inl.h:43
base class of all rvalues
Definition: expression.h:149
DType scalar_
scalar value
Definition: expression.h:98
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:279
PacketArch
Definition: packet-inl.h:42
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:277
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:196
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name CPU
Definition: tensor.h:40
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:520
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size() ...
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:58
definitions of abstract expressions and expressions template
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:171
int32_t index_t
type that will be used for index
Definition: base.h:336
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:294
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:179
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:403
Definition: packet-inl.h:44
support of sse2 packet optimization of some operations
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:213
generic Packet operator
Definition: packet-inl.h:156
PacketPlan(DType scalar)
Definition: packet-inl.h:262
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:245
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:113
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:143
Definition: packet-inl.h:63
const TA & src_
source expression
Definition: expression.h:408
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:226
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:130
unary map expression op(src)
Definition: expression.h:405
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:282
scalar expression
Definition: expression.h:96
Definition: packet-inl.h:204
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:247
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:205
const Container & self(void) const
Definition: expression.h:83
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:50
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:163
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:44
const TA & lhs_
left operand
Definition: expression.h:338
overloaded + operator between half_t and bf16_t
Definition: base.h:327
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:506
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:442
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:396
general tensor
Definition: tensor.h:421
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:389
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:413
support of plain packet that use the plain datatype.
Generic packet type.
Definition: packet-inl.h:60
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:383
index_t openmp_index_t
openmp index for linux
Definition: base.h:344
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:266
static check packet enable
Definition: packet-inl.h:343