mxnet
packet-inl.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MSHADOW_PACKET_INL_H_
25 #define MSHADOW_PACKET_INL_H_
26 
27 #if defined(__APPLE__) || defined(__FreeBSD__)
28 #include <stdlib.h>
29 #else
30 #include <malloc.h>
31 #endif
32 #include "./base.h"
33 #include "./tensor.h"
34 #include "./expression.h"
35 
36 
37 namespace mshadow {
39 namespace packet {
40 
41 enum PacketArch {
44 };
45 
46 #if MSHADOW_USE_SSE
47 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kSSE2
48 #else
49 #define MSHADOW_DEFAULT_PACKET ::mshadow::packet::kPlain
50 #endif
51 
52 // whether packet operator is enabled.
58 template<typename DType, PacketArch Arch = MSHADOW_DEFAULT_PACKET>
59 struct Packet;
60 
61 template<PacketArch Arch>
62 struct AlignBytes {
63  static const index_t value = 4;
64 };
65 
66 } // namespace packet
67 } // namespace mshadow
68 
69 namespace mshadow {
70 namespace packet {
77 inline void* AlignedMallocPitch(size_t *out_pitch,
78  size_t lspace,
79  size_t num_line) {
81  const index_t mask = (1 << bits) - 1;
82 
83  size_t pitch = ((lspace + mask) >> bits) << bits;
84  *out_pitch = pitch;
85 #ifdef _MSC_VER
86  void *res = _aligned_malloc(pitch * num_line, 1 << bits);
87 #else
88  void *res;
89  int ret = posix_memalign(&res, 1 << bits, pitch * num_line);
90  CHECK_EQ(ret, 0) << "AlignedMallocPitch failed";
91 #endif
92  if (res == NULL) {
93  LOG(FATAL) << "AlignedMallocPitch failed";
94  }
95 #if __GNUC__ >= 6
96 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
97 #endif
98  return res;
99 #pragma GCC diagnostic pop
100 }
101 
106 inline void AlignedFree(void *ptr) {
107 #ifdef _MSC_VER
108  _aligned_free(ptr);
109 #else
110  free(ptr);
111 #endif
112 }
113 
115 template<PacketArch Arch>
116 inline bool CheckAlign(size_t pitch) {
117  const index_t bits = AlignBytes<Arch>::value;
118  return !(pitch & ((1 << bits) - 1));
119 }
120 
122 template<PacketArch Arch>
123 inline bool CheckAlign(void *ptr) {
124  return CheckAlign<Arch>(reinterpret_cast<size_t>(ptr));
125 }
126 
132 template<typename DType, PacketArch Arch>
133 inline index_t UpperAlign(index_t size) {
135  const index_t mask = (1 << bits) - 1;
136  const index_t fsize = sizeof(DType);
137  return (((size * fsize + mask) >> bits) << bits) / fsize;
138 }
139 
145 template<typename DType, PacketArch Arch>
146 inline index_t LowerAlign(index_t size) {
148  const index_t fsize = sizeof(DType);
149  return (((size * fsize) >> bits) << bits) / fsize;
150 }
151 
158 template<typename OP, typename DType, PacketArch Arch>
159 struct PacketOp {
160  static const bool kEnabled = false;
161 };
162 // specialization of operators
163 template<typename DType, PacketArch Arch>
164 struct PacketOp<op::plus, DType, Arch> {
165  static const bool kEnabled = true;
167  const Packet<DType, Arch>& rhs) {
168  return lhs + rhs;
169  }
170 };
171 template<typename DType, PacketArch Arch>
172 struct PacketOp<op::minus, DType, Arch> {
173  static const bool kEnabled = true;
175  const Packet<DType, Arch>& rhs) {
176  return lhs - rhs;
177  }
178 };
179 template<typename DType, PacketArch Arch>
180 struct PacketOp<op::mul, DType, Arch> {
181  static const bool kEnabled = true;
183  const Packet<DType, Arch>& rhs) {
184  return lhs * rhs;
185  }
186 };
187 template<typename DType, PacketArch Arch>
188 struct PacketOp<op::div, DType, Arch> {
189  static const bool kEnabled = true;
191  const Packet<DType, Arch>& rhs) {
192  return lhs / rhs;
193  }
194 };
195 
196 template<typename DType, PacketArch Arch>
197 struct PacketOp<op::identity, DType, Arch> {
198  static const bool kEnabled = true;
200  return src;
201  }
202 };
203 
204 
205 // savers to do storage
206 template<typename SV, typename TFloat, PacketArch Arch>
207 struct Saver{
208  MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
211  ans.Store(dst);
212  }
213 };
214 template<typename TFloat, PacketArch Arch>
215 struct Saver<sv::saveto, TFloat, Arch> {
216  MSHADOW_CINLINE static void Save(TFloat *dst, const Packet<TFloat, Arch>& src) {
217  src.Store(dst);
218  }
219 };
220 } // namespace packet
221 } // namespace mshadow
222 
223 #include "packet/plain-inl.h"
224 #if MSHADOW_USE_SSE && !defined(__CUDACC__)
225 #include "packet/sse-inl.h"
226 #endif
227 
228 namespace mshadow {
229 namespace expr {
230 
232 
233 // same as plan, but use packet
234 template<typename ExpType, typename DType, PacketArch Arch>
235 class PacketPlan {
236  public:
242  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const;
243 };
244 
245 template <typename Device, int dim, typename DType, PacketArch Arch>
246 class PacketPlan<Tensor<Device, dim, DType>, DType, Arch> {
247  public:
249  :dptr_(t.dptr_), stride_(t.stride_) {}
251  return packet::Packet<DType, Arch>::Load(&dptr_[y * stride_ + x]);
252  }
253  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
254  return dptr_[y * stride_ + x];
255  }
256 
257  private:
258  const DType *dptr_;
259  index_t stride_;
260 };
261 
262 template<typename DType, PacketArch Arch>
263 class PacketPlan<ScalarExp<DType>, DType, Arch> {
264  public:
265  explicit PacketPlan(DType scalar) : scalar_(scalar) {}
267  return packet::Packet<DType, Arch>::Fill(scalar_);
268  }
269  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
270  return scalar_;
271  }
272 
273  private:
274  DType scalar_;
275 };
276 
277 template<typename OP, typename TA, typename TB, int etype, typename DType, PacketArch Arch>
278 class PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch> {
279  public:
281  : lhs_(lhs), rhs_(rhs) {}
283  return packet::PacketOp<OP, DType, Arch>::Map(lhs_.EvalPacket(y, x), rhs_.EvalPacket(y, x));
284  }
285  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
286  return OP::Map(lhs_.Eval(y, x), rhs_.Eval(y, x));
287  }
288 
289  private:
292 };
293 
294 template<typename OP, typename TA, int etype, typename DType, PacketArch Arch>
295 class PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch> {
296  public:
297  PacketPlan(const PacketPlan<TA, DType, Arch> &src) : src_(src) {}
299  return packet::PacketOp<OP, DType, Arch>::Map(src_.EvalPacket(y, x));
300  }
301  MSHADOW_CINLINE DType Eval(index_t y, index_t x) const {
302  return OP::Map(src_.Eval(y, x));
303  }
304 
305  private:
307 };
308 
309 template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
310 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
311 MakePacketPlan(const BinaryMapExp<OP, TA, TB, DType, etype> &e);
312 
313 template<PacketArch Arch, typename DType>
315  return PacketPlan<ScalarExp<DType>, DType, Arch>(e.scalar_);
316 }
317 template<PacketArch Arch, typename T, typename DType>
319  return PacketPlan<T, DType, Arch>(e.self());
320 }
321 template<PacketArch Arch, typename T, int dim, typename DType>
322 inline PacketPlan<T, DType, Arch>
325 }
326 template<PacketArch Arch, typename OP, typename TA, typename DType, int etype>
327 inline PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>
329  return PacketPlan<UnaryMapExp<OP, TA, DType, etype>, DType, Arch>(MakePacketPlan<Arch>(e.src_));
330 }
331 template<PacketArch Arch, typename OP, typename TA, typename TB, typename DType, int etype>
332 inline PacketPlan<BinaryMapExp<OP, TA, TB, DType, etype>, DType, Arch>
335  DType, Arch>(MakePacketPlan<Arch>(e.lhs_), MakePacketPlan<Arch>(e.rhs_));
336 }
337 
345 template<typename E, PacketArch Arch>
346 struct PacketCheck{
347  static const bool kPass = false;
348 };
349 template<PacketArch Arch>
350 struct PacketCheck<float, Arch> {
351  static const bool kPass = true;
352 };
353 template<PacketArch Arch>
354 struct PacketCheck<double, Arch> {
355  static const bool kPass = true;
356 };
357 template<typename DType, PacketArch Arch>
358 struct PacketCheck<ScalarExp<DType>, Arch> {
360 };
361 template<int dim, typename DType, PacketArch Arch>
362 struct PacketCheck<Tensor<cpu, dim, DType>, Arch> {
364 };
365 template<typename OP, typename TA, typename DType, int etype, PacketArch Arch>
366 struct PacketCheck<UnaryMapExp<OP, TA, DType, etype>, Arch> {
367  static const bool kPass = PacketCheck<TA, Arch>::kPass &&
369 };
370 template<typename OP, typename TA, typename TB, typename DType, int etype, PacketArch Arch>
371 struct PacketCheck< BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
374 };
375 //----------------------------------------------------
376 // Check if data is aligned and allow packet operation
377 //----------------------------------------------------
378 template<int dim, typename E, PacketArch Arch>
380  inline static bool Check(const E &exp) {
381  return false;
382  }
383 };
384 template<int dim, typename DType, PacketArch Arch>
385 struct PacketAlignCheck<dim, ScalarExp<DType>, Arch> {
386  inline static bool Check(const ScalarExp<DType> &exp) {
387  return true;
388  }
389 };
390 template<int dim, typename DType, PacketArch Arch>
391 struct PacketAlignCheck<dim, Tensor<cpu, dim, DType>, Arch> {
392  inline static bool Check(const Tensor<cpu, dim, DType> &t) {
393  return packet::CheckAlign<Arch>(t.dptr_) &&
394  packet::CheckAlign<Arch>(t.stride_ * sizeof(DType));
395  }
396 };
397 template<int dim, typename OP, typename TA, typename DType, int etype, PacketArch Arch>
398 struct PacketAlignCheck<dim, UnaryMapExp<OP, TA, DType, etype>, Arch> {
399  inline static bool Check(const UnaryMapExp<OP, TA, DType, etype> &t) {
401  }
402 };
403 template<int dim, typename OP, typename TA, typename TB,
404  typename DType, int etype, PacketArch Arch>
405 struct PacketAlignCheck<dim, BinaryMapExp<OP, TA, TB, DType, etype>, Arch> {
406  inline static bool Check(const BinaryMapExp<OP, TA, TB, DType, etype> &t) {
409  }
410 };
411 
415 template<typename SV, typename E, int dim, typename DType, PacketArch Arch>
417  const expr::PacketPlan<E, DType, Arch>& plan) {
418  Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
419  const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
420  const size_t packetSize = packet::Packet<DType, Arch>::size;
421 #ifndef __CUDACC__
422  #pragma omp parallel for
423 #endif
424  for (openmp_index_t y = 0; y < dst.size(0); ++y) {
425  for (index_t x = 0; x < xlen; x += packetSize) {
426  packet::Saver<SV, DType, Arch>::Save(&dst[y][x], plan.EvalPacket(y, x));
427  }
428  for (index_t x = xlen; x < dst.size(1); ++x) {
429  SV::Save(dst[y][x], plan.Eval(y, x));
430  }
431  }
432 }
433 } // namespace expr
434 } // namespace mshadow
435 #endif // MSHADOW_PACKET_INL_H_
expression.h
definitions of abstract expressions and expressions template
mshadow::expr::PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch >::EvalPacket
MSHADOW_CINLINE packet::Packet< DType > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:298
mshadow::openmp_index_t
index_t openmp_index_t
openmp index for linux
Definition: base.h:336
mshadow::packet::kSSE2
@ kSSE2
Definition: packet-inl.h:43
mshadow::expr::Exp< Container, DType, type::kRValue >::self
const Container & self(void) const
Definition: expression.h:82
mshadow::expr::scalar
ScalarExp< DType > scalar(DType s)
create an scalar expression
Definition: expression.h:103
mshadow::expr::PacketPlan< Tensor< Device, dim, DType >, DType, Arch >::EvalPacket
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:250
mshadow::packet::PacketArch
PacketArch
Definition: packet-inl.h:41
mshadow::packet::PacketOp
generic Packet operator
Definition: packet-inl.h:159
mshadow::expr::PacketCheck
static check packet enable
Definition: packet-inl.h:346
mshadow::packet::PacketOp< op::identity, DType, Arch >::Map
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &src)
Definition: packet-inl.h:199
mshadow::packet::PacketOp< op::div, DType, Arch >::Map
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:190
mshadow::packet::Packet
Generic packet type.
Definition: packet-inl.h:59
mshadow::expr::PacketAlignCheck< dim, BinaryMapExp< OP, TA, TB, DType, etype >, Arch >::Check
static bool Check(const BinaryMapExp< OP, TA, TB, DType, etype > &t)
Definition: packet-inl.h:406
mshadow::expr::PacketPlan::Eval
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
mshadow::expr::PacketAlignCheck< dim, UnaryMapExp< OP, TA, DType, etype >, Arch >::Check
static bool Check(const UnaryMapExp< OP, TA, DType, etype > &t)
Definition: packet-inl.h:399
mshadow::Tensor
general tensor
Definition: tensor.h:525
mshadow::packet::LowerAlign
index_t LowerAlign(index_t size)
get lower bound of aligned index of size
Definition: packet-inl.h:146
mshadow::packet::AlignedMallocPitch
void * AlignedMallocPitch(size_t *out_pitch, size_t lspace, size_t num_line)
analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
Definition: packet-inl.h:77
mshadow::packet::PacketOp< op::mul, DType, Arch >::Map
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:182
mshadow::packet::PacketOp::kEnabled
static const bool kEnabled
Definition: packet-inl.h:160
mshadow::expr::PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch >::Eval
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:301
mshadow::packet::kPlain
@ kPlain
Definition: packet-inl.h:42
mshadow::packet::UpperAlign
index_t UpperAlign(index_t size)
get upper bound of aligned index of size
Definition: packet-inl.h:133
sse-inl.h
support of sse2 packet optimization of some operations
plain-inl.h
support of plain packet that use the plain datatype.
mshadow::expr::PacketPlan< BinaryMapExp< OP, TA, TB, DType, etype >, DType, Arch >::Eval
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:285
mshadow::expr::PacketArch
packet::PacketArch PacketArch
Definition: packet-inl.h:231
mshadow::expr::BinaryMapExp
binary map expression lhs [op] rhs
Definition: expression.h:334
mshadow::cpu
device name CPU
Definition: tensor.h:39
mshadow::expr::PacketPlan< BinaryMapExp< OP, TA, TB, DType, etype >, DType, Arch >::EvalPacket
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:282
tensor.h
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
mshadow::expr::PacketPlan::EvalPacket
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
evaluate the expression at index [y][x], x will be aligned to Packet<DType, Arch>::Size()
mshadow::expr::mask
MaskExp< IndexExp, SrcExp, DType > mask(const Exp< IndexExp, DType, e1 > &index, const Exp< SrcExp, DType, e2 > &src)
Definition: mask.h:57
mshadow::expr::UnaryMapExp::src_
const TA & src_
source expression
Definition: expression.h:407
MSHADOW_CINLINE
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:231
mshadow::packet::PacketOp< op::minus, DType, Arch >::Map
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:174
mshadow::expr::PacketAlignCheck
Definition: packet-inl.h:379
mshadow::expr::PacketPlan< UnaryMapExp< OP, TA, DType, etype >, DType, Arch >::PacketPlan
PacketPlan(const PacketPlan< TA, DType, Arch > &src)
Definition: packet-inl.h:297
mshadow::expr::ScalarExp::scalar_
DType scalar_
scalar value
Definition: expression.h:97
mshadow::expr::PacketPlan< ScalarExp< DType >, DType, Arch >::EvalPacket
MSHADOW_CINLINE packet::Packet< DType, Arch > EvalPacket(index_t y, index_t x) const
Definition: packet-inl.h:266
mshadow::expr::PacketPlan< ScalarExp< DType >, DType, Arch >::PacketPlan
PacketPlan(DType scalar)
Definition: packet-inl.h:265
mshadow::packet::AlignBytes
Definition: packet-inl.h:62
mshadow::expr::PacketPlan
Definition: packet-inl.h:235
mshadow::expr::BinaryMapExp::rhs_
const TB & rhs_
right operand
Definition: expression.h:339
mshadow::index_t
int32_t index_t
type that will be used for index
Definition: base.h:328
mshadow::expr::MakeTensorExp::real_self
const SubType & real_self(void) const
true self of subtype
Definition: expr_engine-inl.h:49
mshadow::expr::PacketCheck::kPass
static const bool kPass
Definition: packet-inl.h:347
mshadow::expr::PacketPlan< Tensor< Device, dim, DType >, DType, Arch >::PacketPlan
PacketPlan(const Tensor< Device, dim, DType > &t)
Definition: packet-inl.h:248
mshadow::expr::RValueExp
base class of all rvalues
Definition: expression.h:148
mshadow::expr::PacketPlan< Tensor< Device, dim, DType >, DType, Arch >::Eval
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:253
mshadow::packet::Saver< sv::saveto, TFloat, Arch >::Save
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:216
mshadow
overloaded + operator between half_t and bf16_t
Definition: base.h:319
mshadow::expr::MakePacketPlan
PacketPlan< BinaryMapExp< OP, TA, TB, DType, etype >, DType, Arch > MakePacketPlan(const BinaryMapExp< OP, TA, TB, DType, etype > &e)
Definition: packet-inl.h:333
mshadow::expr::PacketAlignCheck::Check
static bool Check(const E &exp)
Definition: packet-inl.h:380
mshadow::expr::MakeTensorExp
a general class that allows extension that makes tensors of some shape
Definition: expr_engine-inl.h:43
mshadow::packet::AlignBytes::value
static const index_t value
Definition: packet-inl.h:63
mshadow::expr::PacketAlignCheck< dim, Tensor< cpu, dim, DType >, Arch >::Check
static bool Check(const Tensor< cpu, dim, DType > &t)
Definition: packet-inl.h:392
mshadow::expr::PacketPlan< ScalarExp< DType >, DType, Arch >::Eval
MSHADOW_CINLINE DType Eval(index_t y, index_t x) const
Definition: packet-inl.h:269
mshadow::Tensor::FlatTo2D
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:624
mshadow::expr::BinaryMapExp::lhs_
const TA & lhs_
left operand
Definition: expression.h:337
mshadow::expr::MapPacketPlan
void MapPacketPlan(Tensor< cpu, dim, DType > _dst, const expr::PacketPlan< E, DType, Arch > &plan)
use PacketPlan to compute result
Definition: packet-inl.h:416
mshadow::Tensor::dptr_
DType * dptr_
pointer to the data
Definition: tensor.h:539
mshadow::packet::PacketOp< op::plus, DType, Arch >::Map
static MSHADOW_CINLINE Packet< DType, Arch > Map(const Packet< DType, Arch > &lhs, const Packet< DType, Arch > &rhs)
Definition: packet-inl.h:166
mshadow::packet::Saver::Save
static MSHADOW_CINLINE void Save(TFloat *dst, const Packet< TFloat, Arch > &src)
Definition: packet-inl.h:208
mshadow::packet::Saver
Definition: packet-inl.h:207
mshadow::Tensor::size
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:610
mshadow::expr::PacketAlignCheck< dim, ScalarExp< DType >, Arch >::Check
static bool Check(const ScalarExp< DType > &exp)
Definition: packet-inl.h:386
mshadow::packet::CheckAlign
bool CheckAlign(size_t pitch)
check if a pointer is aligned
Definition: packet-inl.h:116
mshadow::expr::UnaryMapExp
unary map expression op(src)
Definition: expression.h:404
base.h
definitions of base types, operators, macros functions
mshadow::expr::ScalarExp
scalar expression
Definition: expression.h:95
mshadow::expr::PacketPlan< BinaryMapExp< OP, TA, TB, DType, etype >, DType, Arch >::PacketPlan
PacketPlan(const PacketPlan< TA, DType, Arch > &lhs, const PacketPlan< TB, DType, Arch > &rhs)
Definition: packet-inl.h:280
mshadow::Tensor::stride_
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:546
mshadow::packet::AlignedFree
void AlignedFree(void *ptr)
free aligned space
Definition: packet-inl.h:106