mxnet
packet-inl.h
Go to the documentation of this file.
1 
6 #ifndef MSHADOW_PACKET_INL_H_
7 #define MSHADOW_PACKET_INL_H_
8 
9 #ifdef __APPLE__
10 #include <stdlib.h>
11 #else
12 #include <malloc.h>
13 #endif
14 #include "./base.h"
15 #include "./tensor.h"
16 #include "./expression.h"
17 
18 
19 namespace mshadow {
21 namespace packet {
22 
23 enum PacketArch {
26 };
27 
28 #if MSHADOW_USE_SSE
29 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2
30 #else
31 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain
32 #endif
33 
34 // whether packet operator is enabled.
40 template<typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
41 struct Packet;
42 
43 template<PacketArch Arch>
44 struct AlignBytes {
45  static const index_t value = 4;
46 };
47 
48 } // namespace packet
49 } // namespace mshadow
50 
51 namespace mshadow {
52 namespace packet {
59 inline void* AlignedMallocPitch(size_t *out_pitch,
60  size_t lspace,
61  size_t num_line) {
63  const index_t mask = (1 << bits) - 1;
64 
65  size_t pitch = ((lspace + mask) >> bits) << bits;
66  *out_pitch = pitch;
67 #ifdef _MSC_VER
68  void *res = _aligned_malloc(pitch * num_line, 1 << bits);
69 #else
70  void *res;
71  int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
72  CHECK_EQ(ret, 0) << "AlignedMallocPitch failed";
73 #endif
74  if (res == NULL) {
75  LOG(FATAL) << "AlignedMallocPitch failed";
76  }
77  return res;
78 }
79 
84 inline void AlignedFree(void *ptr) {
85 #ifdef _MSC_VER
86  _aligned_free(ptr);
87 #else
88  free(ptr);
89 #endif
90 }
91 
93 template<PacketArch Arch>
94 inline bool CheckAlign(size_t pitch) {
95  const index_t bits = AlignBytes<Arch>::value;
96  return !(pitch & ((1 << bits) - 1));
97 }
98 
100 template<PacketArch Arch>
101 inline bool CheckAlign(void *ptr) {
102  return CheckAlign<Arch>(reinterpret_cast<size_t>(ptr));
103 }
104 
110 template<typename DType, PacketArch Arch>
111 inline index_t UpperAlign(index_t size) {
113  const index_t mask = (1 << bits) - 1;
114  const index_t fsize = sizeof(DType);
115  return (((size * fsize + mask) >> bits) << bits) / fsize;
116 }
117 
123 template<typename DType, PacketArch Arch>
124 inline index_t LowerAlign(index_t size) {
126  const index_t fsize = sizeof(DType);
127  return (((size * fsize) >> bits) << bits) / fsize;
128 }
129 
136 template<typename OP, typename DType, PacketArch Arch>
137 struct PacketOp {
138  static const bool kEnabled = false;
139 };
140 // specialization of operators
141 template<typename DType, PacketArch Arch>
142 struct PacketOp<op::plus, DType, Arch> {
143  static const bool kEnabled = true;
145  const Packet<DType, Arch>& rhs) {
146  return lhs + rhs;
147  }
148 };
149 template<typename DType, PacketArch Arch>
150 struct PacketOp<op::minus, DType, Arch> {
151  static const bool kEnabled = true;
153  const Packet<DType, Arch>& rhs) {
154  return lhs - rhs;
155  }
156 };
157 template<typename DType, PacketArch Arch>
158 struct PacketOp<op::mul, DType, Arch> {
159  static const bool kEnabled = true;
161  const Packet<DType, Arch>& rhs) {
162  return lhs * rhs;
163  }
164 };
165 template<typename DType, PacketArch Arch>
166 struct PacketOp<op::div, DType, Arch> {
167  static const bool kEnabled = true;
169  const Packet<DType, Arch>& rhs) {
170  return lhs / rhs;
171  }
172 };
173 
174 template<typename DType, PacketArch Arch>
175 struct PacketOp<op::identity, DType, Arch> {
176  static const bool kEnabled = true;
178  return src;
179  }
180 };
181 
182 
183 // savers to do storage
184 template<typename SV, typename TFloat, PacketArch Arch>
185 struct Saver{
186  MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
189  ans.Store(dst);
190  }
191 };
192 template<typename TFloat, PacketArch Arch>
193 struct Saver<sv::saveto, TFloat, Arch> {
194  MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
195  src.Store(dst);
196  }
197 };
198 } // namespace packet
199 } // namespace mshadow
200 
201 #include "packet/plain-inl.h"
202 #if MSHADOW_USE_SSE && !defined(__CUDACC__)
203 #include "packet/sse-inl.h"
204 #endif
205 
206 namespace mshadow {
207 namespace expr {
208 
210 
211 // same as plan, but use packet
212 template<typename ExpType, typename DType, PacketArch Arch>
213 class PacketPlan {
214  public:
220  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const;
221 };
222 
223 template <typename Device, int dim, typename DType, PacketArch Arch>
224 class PacketPlan<Tensor<Device, dim, DType>, DType, Arch> {
225  public:
227  :dptr_(t.dptr_), stride_(t.stride_) {}
229  return packet::Packet<DType, Arch>::Load(&dptr_[y * stride_ + x]);
230  }
231  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
232  return dptr_[y * stride_ + x];
233  }
234 
235  private:
236  const DType *dptr_;
237  index_t stride_;
238 };
239 
240 template<typename DType, PacketArch Arch>
241 class PacketPlan<ScalarExp<DType>, DType, Arch> {
242  public:
243  explicit PacketPlan(DType scalar) : scalar_(scalar) {}
245  return packet::Packet<DType, Arch>::Fill(scalar_);
246  }
247  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
248  return scalar_;
249  }
250 
251  private:
252  DType scalar_;
253 };
254 
255 template<typename OP, typename TA, typename TB, int etype, typename DType, PacketArch Arch>
256 class PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch> {
257  public:
259  : lhs_(lhs), rhs_(rhs) {}
261  return packet::PacketOp<OP, DType, Arch>::Map(lhs_.EvalPacket(y, x), rhs_.EvalPacket(y, x));
262  }
263  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
264  return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
265  }
266 
267  private:
270 };
271 
272 template<typename OP, typename TA, int etype, typename DType, PacketArch Arch>
273 class PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch> {
274  public:
275  PacketPlan(const PacketPlan<TA, DType, Arch> &src) : src_(src) {}
277  return packet::PacketOp<OP, DType, Arch>::Map(src_.EvalPacket(y, x));
278  }
279  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
280  return OP::Map(src_.Eval(y, x));
281  }
282 
283  private:
285 };
286 
287 template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
290 
291 template<PacketArch Arch, typename DType>
293  return PacketPlan<ScalarExp<DType>, DType, Arch>(e.scalar_);
294 }
295 template<PacketArch Arch, typename T, typename DType>
297  return PacketPlan<T, DType, Arch>(e.self());
298 }
299 template<PacketArch Arch, typename T, int dim, typename DType>
303 }
304 template<PacketArch Arch, typename OP, typename TA, typename DType, int etype>
307  return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.src_));
308 }
309 template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
310 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
312  return PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>,
313  DType, Arch>(MakePacketPlan<Arch>(e.lhs_), MakePacketPlan<Arch>(e.rhs_));
314 }
315 
323 template<typename E, PacketArch Arch>
324 struct PacketCheck{
325  static const bool kPass = false;
326 };
327 template<PacketArch Arch>
328 struct PacketCheck<float, Arch> {
329  static const bool kPass = true;
330 };
331 template<PacketArch Arch>
332 struct PacketCheck<double, Arch> {
333  static const bool kPass = true;
334 };
335 template<typename DType, PacketArch Arch>
336 struct PacketCheck<ScalarExp<DType>, Arch> {
337  static const bool kPass = PacketCheck<DType, Arch>::kPass;
338 };
339 template<int dim, typename DType, PacketArch Arch>
340 struct PacketCheck<Tensor<cpu, dim, DType>, Arch> {
341  static const bool kPass = PacketCheck<DType, Arch>::kPass;
342 };
343 template<typename OP, typename TA, typename DType, int etype, PacketArch Arch>
344 struct PacketCheck<UnaryMapExp<OP, TA, DType, etype>, Arch> {
345  static const bool kPass = PacketCheck<TA, Arch>::kPass &&
347 };
348 template<typename OP, typename TA, typename TB, typename DType, int etype, PacketArch Arch>
349 struct PacketCheck< BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
350  static const bool kPass = packet::PacketOp<OP, DType, Arch>::kEnabled &&
352 };
353 //----------------------------------------------------
354 // Check if data is aligned and allow packet operation
355 //----------------------------------------------------
356 template<int dim, typename E, PacketArch Arch>
358  inline static bool Check(const E &exp) {
359  return false;
360  }
361 };
362 template<int dim, typename DType, PacketArch Arch>
363 struct PacketAlignCheck<dim, ScalarExp<DType>, Arch> {
364  inline static bool Check(const ScalarExp<DType> &exp) {
365  return true;
366  }
367 };
368 template<int dim, typename DType, PacketArch Arch>
369 struct PacketAlignCheck<dim, Tensor<cpu, dim, DType>, Arch> {
370  inline static bool Check(const Tensor<cpu, dim, DType> &t) {
371  return packet::CheckAlign<Arch>(t.dptr_) &&
372  packet::CheckAlign<Arch>(t.stride_ * sizeof(DType));
373  }
374 };
375 template<int dim, typename OP, typename TA, typename DType, int etype, PacketArch Arch>
376 struct PacketAlignCheck<dim, UnaryMapExp<OP, TA, DType, etype>, Arch> {
377  inline static bool Check(const UnaryMapExp<OP, TA, DType, etype> &t) {
379  }
380 };
381 template<int dim, typename OP, typename TA, typename TB,
382  typename DType, int etype, PacketArch Arch>
383 struct PacketAlignCheck<dim, BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
384  inline static bool Check(const BinaryMapExp<OP, TA, TB, DType, etype> &t) {
387  }
388 };
389 
393 template<typename SV, typename E, int dim, typename DType, PacketArch Arch>
395  const expr::PacketPlan<E, DType, Arch>& plan) {
396  Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
397  const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
398  const size_t packetSize = packet::Packet<DType, Arch>::size;
399 #ifndef __CUDACC__
400  #pragma omp parallel for
401 #endif
402  for (openmp_index_t y = 0; y < dst.size(0); ++y) {
403  for (index_t x = 0; x < xlen; x += packetSize) {
404  packet::Saver<SV, DType, Arch>::Save(&dst[y][x], plan.EvalPacket(y, x));
405  }
406  for (index_t x = xlen; x < dst.size(1); ++x) {
407  SV::Save(dst[y][x], plan.Eval(y, x));
408  }
409  }
410 }
411 } // namespace expr
412 } // namespace mshadow
413 #endif // MSHADOW_PACKET_INL_H_
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:244
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:168
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:85
Definition: packet-inl.h:213
DType * dptr_
pointer to the data
Definition: tensor.h:416
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:276
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:231
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:84
const TB & rhs_
right operand
Definition: expression.h:321
Definition: packet-inl.h:357
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:279
static bool Check(const E &exp)
Definition: packet-inl.h:358
binary map expression lhs [op] rhs
Definition: expression.h:316
static const index_t value
Definition: packet-inl.h:45
PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch > MakePacketPlan(const UnaryMapExp< OP, TA, DType, etype > &e)
Definition: packet-inl.h:306
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:59
Definition: packet-inl.h:24
base class of all rvalues
Definition: expression.h:130
DType scalar_
scalar value
Definition: expression.h:79
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:260
PacketArch
Definition: packet-inl.h:23
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:258
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:177
device name CPU
Definition: tensor.h:21
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:501
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size() ...
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:39
definitions of abstract expressions and expressions template
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:152
int32_t index_t
type that will be used for index
Definition: base.h:291
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:275
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:160
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:384
Definition: packet-inl.h:25
support of sse2 packet optimization of some operations
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:194
generic Packet operator
Definition: packet-inl.h:137
PacketPlan(DType scalar)
Definition: packet-inl.h:243
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:226
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:94
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:124
Definition: packet-inl.h:44
const TA & src_
source expression
Definition: expression.h:389
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:207
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:111
unary map expression op(src)
Definition: expression.h:386
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:263
scalar expression
Definition: expression.h:77
Definition: packet-inl.h:185
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:228
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:186
const Container & self(void) const
Definition: expression.h:64
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:31
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:144
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:25
const TA & lhs_
left operand
Definition: expression.h:319
namespace for mshadow
Definition: base.h:282
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:487
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:423
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:377
general tensor
Definition: tensor.h:402
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:370
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:394
support of plain packet that use the plain datatype.
Generic packet type.
Definition: packet-inl.h:41
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:364
index_t openmp_index_t
openmp index for linux
Definition: base.h:299
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:247
static check packet enable
Definition: packet-inl.h:324