8 #ifndef MSHADOW_HALF2_H_ 9 #define MSHADOW_HALF2_H_ 11 #if (defined(__CUDACC__) && __CUDA_ARCH__ >= 530 && MSHADOW_USE_CUDA && CUDA_VERSION >= 7050) 12 #define MSHADOW_CUDA_HALF2 1 13 #include <cuda_fp16.h> 15 #define MSHADOW_CUDA_HALF2 0 25 #define MSHADOW_HALF2_ASSIGNOP(AOP, OP) \ 26 template<typename T> \ 27 MSHADOW_XINLINE half2_t operator AOP (const T& a) { \ 28 return *this = half2_t(*this OP a); \ 33 #if MSHADOW_CUDA_HALF2 41 #if MSHADOW_CUDA_HALF2 51 #if MSHADOW_CUDA_HALF2 52 half2_ = __half2half2(__int2half_rz(a));
54 half_t2[0] = (half_t)a;
55 half_t2[1] = (half_t)a;
64 #if MSHADOW_CUDA_HALF2 65 return half2_t(__hneg2(half2_));
67 return half2_t(-half_t2[0], -half_t2[1]);
72 #if MSHADOW_CUDA_HALF2 75 half_t2[0] = a.half_t2[0];
76 half_t2[1] = a.half_t2[1];
89 #if MSHADOW_CUDA_HALF2 90 return half2_t(__floats2half2_rn(__low2float(a.half2_) + __low2float(b.half2_),
91 __high2float(a.half2_) + __high2float(b.half2_)));
93 return half2_t(a.half_t2[0] + b.half_t2[0], a.half_t2[1] + b.half_t2[1]);
98 #if MSHADOW_CUDA_HALF2 99 return half2_t(__floats2half2_rn(__low2float(a.half2_) - __low2float(b.half2_),
100 __high2float(a.half2_) - __high2float(b.half2_)));
102 return half2_t(a.half_t2[0] - b.half_t2[0], a.half_t2[1] - b.half_t2[1]);
107 #if MSHADOW_CUDA_HALF2 108 return half2_t(__floats2half2_rn(__low2float(a.half2_) * __low2float(b.half2_),
109 __high2float(a.half2_) * __high2float(b.half2_)));
111 return half2_t(a.half_t2[0] * b.half_t2[0], a.half_t2[1] * b.half_t2[1]);
116 #if MSHADOW_CUDA_HALF2 117 return half2_t(__floats2half2_rn(__low2float(a.half2_) / __low2float(b.half2_),
118 __high2float(a.half2_) / __high2float(b.half2_)));
120 return half2_t(a.half_t2[0] / b.half_t2[0], a.half_t2[1] / b.half_t2[1]);
125 #if MSHADOW_CUDA_HALF2 126 return half2_t(__floats2half2_rn(::fmod(__low2float(a.half2_), __low2float(b.half2_)),
127 ::fmod(__high2float(a.half2_), __high2float(b.half2_))));
129 return half2_t(::fmod(a.half_t2[0], b.half_t2[0]), ::fmod(a.half_t2[1], b.half_t2[1]));
134 #if MSHADOW_CUDA_HALF2 135 return __hbeq2(a.half2_, b.half2_);
137 return (a.half_t2[0] == b.half_t2[0] && a.half_t2[1] == b.half_t2[1]);
143 #endif // MSHADOW_HALF2_H_ class MSHADOW_ALIGNED(2) half_t
Definition: half.h:94
#define MSHADOW_HALF2_ASSIGNOP(AOP, OP)
Definition: half2.h:25
MSHADOW_XINLINE half2_t operator+(half2_t a, half2_t b)
overloaded + operator for half2_t
Definition: half2.h:88
#define MSHADOW_XINLINE
Definition: base.h:204
MSHADOW_XINLINE half2_t operator-(half2_t a, half2_t b)
overloaded - operator for half2_t
Definition: half2.h:97
MSHADOW_XINLINE bool operator==(half2_t a, half2_t b)
overloaded == operator for half2_t
Definition: half2.h:133
MSHADOW_XINLINE half2_t operator*(half2_t a, half2_t b)
overloaded * operator for half2_t
Definition: half2.h:106
MSHADOW_XINLINE half2_t operator/(half2_t a, half2_t b)
overloaded / operator for half2_t
Definition: half2.h:115
namespace for mshadow
Definition: base.h:282
MSHADOW_XINLINE half2_t operator%(half2_t a, half2_t b)
overloaded % operator for half2_t
Definition: half2.h:124