6 #ifndef MSHADOW_PACKET_INL_H_ 7 #define MSHADOW_PACKET_INL_H_ 29 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2 31 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain 40 template<
typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
43 template<PacketArch Arch>
65 size_t pitch = ((lspace +
mask) >> bits) << bits;
68 void *res = _aligned_malloc(pitch * num_line, 1 << bits);
71 int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
72 CHECK_EQ(ret, 0) <<
"AlignedMallocPitch failed";
75 LOG(FATAL) <<
"AlignedMallocPitch failed";
93 template<PacketArch Arch>
96 return !(pitch & ((1 << bits) - 1));
100 template<PacketArch Arch>
102 return CheckAlign<Arch>(
reinterpret_cast<size_t>(ptr));
110 template<
typename DType, PacketArch Arch>
114 const index_t fsize =
sizeof(DType);
115 return (((size * fsize + mask) >> bits) << bits) / fsize;
123 template<
typename DType, PacketArch Arch>
126 const index_t fsize =
sizeof(DType);
127 return (((size * fsize) >> bits) << bits) / fsize;
136 template<
typename OP,
typename DType, PacketArch Arch>
138 static const bool kEnabled =
false;
141 template<
typename DType, PacketArch Arch>
143 static const bool kEnabled =
true;
149 template<
typename DType, PacketArch Arch>
151 static const bool kEnabled =
true;
157 template<
typename DType, PacketArch Arch>
159 static const bool kEnabled =
true;
165 template<
typename DType, PacketArch Arch>
167 static const bool kEnabled =
true;
174 template<
typename DType, PacketArch Arch>
176 static const bool kEnabled =
true;
184 template<
typename SV,
typename TFloat, PacketArch Arch>
192 template<
typename TFloat, PacketArch Arch>
193 struct Saver<sv::saveto, TFloat, Arch> {
202 #if MSHADOW_USE_SSE && !defined(__CUDACC__) 212 template<
typename ExpType,
typename DType, PacketArch Arch>
223 template <
typename Device,
int dim,
typename DType, PacketArch Arch>
227 :dptr_(t.dptr_), stride_(t.stride_) {}
232 return dptr_[y * stride_ + x];
240 template<
typename DType, PacketArch Arch>
255 template<
typename OP,
typename TA,
typename TB,
int etype,
typename DType, PacketArch Arch>
259 : lhs_(lhs), rhs_(rhs) {}
264 return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
272 template<
typename OP,
typename TA,
int etype,
typename DType, PacketArch Arch>
280 return OP::Map(src_.Eval(y, x));
287 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
291 template<PacketArch Arch,
typename DType>
295 template<PacketArch Arch,
typename T,
typename DType>
299 template<PacketArch Arch,
typename T,
int dim,
typename DType>
304 template<PacketArch Arch,
typename OP,
typename TA,
typename DType,
int etype>
307 return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.
src_));
309 template<PacketArch Arch,
typename OP,
typename TA,
typename TB,
typename DType,
int etype>
310 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
312 return PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>,
313 DType, Arch>(MakePacketPlan<Arch>(e.
lhs_), MakePacketPlan<Arch>(e.
rhs_));
323 template<
typename E, PacketArch Arch>
325 static const bool kPass =
false;
327 template<PacketArch Arch>
329 static const bool kPass =
true;
331 template<PacketArch Arch>
333 static const bool kPass =
true;
335 template<
typename DType, PacketArch Arch>
339 template<
int dim,
typename DType, PacketArch Arch>
343 template<
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
348 template<
typename OP,
typename TA,
typename TB,
typename DType,
int etype, PacketArch Arch>
356 template<
int dim,
typename E, PacketArch Arch>
358 inline static bool Check(
const E &exp) {
362 template<
int dim,
typename DType, PacketArch Arch>
368 template<
int dim,
typename DType, PacketArch Arch>
371 return packet::CheckAlign<Arch>(t.
dptr_) &&
372 packet::CheckAlign<Arch>(t.
stride_ *
sizeof(DType));
375 template<
int dim,
typename OP,
typename TA,
typename DType,
int etype, PacketArch Arch>
381 template<
int dim,
typename OP,
typename TA,
typename TB,
382 typename DType,
int etype, PacketArch Arch>
393 template<
typename SV,
typename E,
int dim,
typename DType, PacketArch Arch>
397 const index_t xlen = packet::LowerAlign<DType, Arch>(dst.
size(1));
400 #pragma omp parallel for 403 for (
index_t x = 0; x < xlen; x += packetSize) {
407 SV::Save(dst[y][x], plan.
Eval(y, x));
413 #endif // MSHADOW_PACKET_INL_H_ MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:244
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:168
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:85
Definition: packet-inl.h:213
DType * dptr_
pointer to the data
Definition: tensor.h:416
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:276
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:231
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:84
const TB & rhs_
right operand
Definition: expression.h:321
Definition: packet-inl.h:357
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:279
static bool Check(const E &exp)
Definition: packet-inl.h:358
binary map expression lhs [op] rhs
Definition: expression.h:316
static const index_t value
Definition: packet-inl.h:45
PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch > MakePacketPlan(const UnaryMapExp< OP, TA, DType, etype > &e)
Definition: packet-inl.h:306
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:59
Definition: packet-inl.h:24
base class of all rvalues
Definition: expression.h:130
DType scalar_
scalar value
Definition: expression.h:79
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:260
PacketArch
Definition: packet-inl.h:23
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:258
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:177
device name CPU
Definition: tensor.h:21
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:501
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size() ...
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:39
definitions of abstract expressions and expressions template
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:152
int32_t index_t
type that will be used for index
Definition: base.h:291
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:275
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:160
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:384
Definition: packet-inl.h:25
support of sse2 packet optimization of some operations
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:194
generic Packet operator
Definition: packet-inl.h:137
PacketPlan(DType scalar)
Definition: packet-inl.h:243
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:226
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:94
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:124
Definition: packet-inl.h:44
const TA & src_
source expression
Definition: expression.h:389
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:207
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:111
unary map expression op(src)
Definition: expression.h:386
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:263
scalar expression
Definition: expression.h:77
Definition: packet-inl.h:185
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:228
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:186
const Container & self(void) const
Definition: expression.h:64
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:31
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:144
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:25
const TA & lhs_
left operand
Definition: expression.h:319
namespace for mshadow
Definition: base.h:282
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:487
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:423
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:377
general tensor
Definition: tensor.h:402
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:370
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:394
support of plain packet that use the plain datatype.
Generic packet type.
Definition: packet-inl.h:41
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:364
index_t openmp_index_t
openmp index for linux
Definition: base.h:299
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:247
static check packet enable
Definition: packet-inl.h:324