7 #ifndef MSHADOW_PACKET_SSE_INL_H_ 8 #define MSHADOW_PACKET_SSE_INL_H_ 10 #include <emmintrin.h> 12 #include "../packet-inl.h" 26 explicit Packet(__m128 data) : data_(data) {}
41 data_ = _mm_set1_ps(s);
46 _mm_store_ps(dst, data_);
50 __m128 ans = _mm_add_ps(data_, _mm_movehl_ps(data_, data_));
51 __m128 rst = _mm_add_ss(ans, _mm_shuffle_ps(ans, ans, 1));
52 #if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) 53 return rst.m128_f32[0];
55 float rr = _mm_cvtss_f32(rst);
71 explicit Packet(__m128d data) : data_(data) {}
85 data_ = _mm_set1_pd(s);
90 _mm_store_pd(dst, data_);
93 inline double Sum(
void)
const {
94 __m128d tmp = _mm_add_sd(data_, _mm_unpackhi_pd(data_, data_));
95 #if defined(_MSC_VER) && (_MSC_VER <= 1500) && defined(_WIN64) 96 return tmp.m128d_f64[0];
98 double ans = _mm_cvtsd_f64(tmp);
147 #endif // MSHADOW_PACKET_SSE_INL_H_ vector real type for float
Definition: sse-inl.h:64
static MSHADOW_CINLINE Packet< float, kSSE2 > Fill(float s)
Definition: sse-inl.h:28
MSHADOW_CINLINE Packet< float, kSSE2 > & operator=(float s)
Definition: sse-inl.h:40
static MSHADOW_CINLINE Packet< float, kSSE2 > LoadUnAligned(const float *src)
Definition: sse-inl.h:36
MSHADOW_CINLINE Packet< DType, kPlain > operator-(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:59
MSHADOW_CINLINE Packet< DType, kPlain > operator/(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:70
MSHADOW_CINLINE void Store(float *dst) const
Definition: sse-inl.h:45
static MSHADOW_CINLINE Packet< double, kSSE2 > Load(const double *src)
Definition: sse-inl.h:77
Packet(__m128 data)
Definition: sse-inl.h:26
__m128d data_
Definition: sse-inl.h:68
MSHADOW_CINLINE float Sum() const
Definition: sse-inl.h:49
int32_t index_t
type that will be used for index
Definition: base.h:291
Packet(__m128d data)
Definition: sse-inl.h:71
MSHADOW_CINLINE Packet< DType, kPlain > operator*(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:64
Definition: packet-inl.h:25
MSHADOW_CINLINE Packet< double, kSSE2 > & operator=(double s)
Definition: sse-inl.h:84
double Sum(void) const
Definition: sse-inl.h:93
__m128 data_
The internal data.
Definition: sse-inl.h:22
MSHADOW_CINLINE Packet< DType, kPlain > operator+(const Packet< DType, kPlain > &lhs, const Packet< DType, kPlain > &rhs)
Definition: plain-inl.h:53
#define MSHADOW_CINLINE
cpu force inline
Definition: base.h:207
static MSHADOW_CINLINE Packet< float, kSSE2 > Load(const float *src)
Definition: sse-inl.h:32
namespace for mshadow
Definition: base.h:282
static MSHADOW_CINLINE Packet< double, kSSE2 > LoadUnAligned(const double *src)
Definition: sse-inl.h:80
Packet(void)
Definition: sse-inl.h:24
static MSHADOW_CINLINE Packet< double, kSSE2 > Fill(double s)
Definition: sse-inl.h:73
MSHADOW_CINLINE void Store(double *dst) const
Definition: sse-inl.h:89
Generic packet type.
Definition: packet-inl.h:41
Packet(void)
Definition: sse-inl.h:70