27 #ifndef MSHADOW_HALF2_H_ 28 #define MSHADOW_HALF2_H_ 30 #if (defined(__CUDACC__) && __CUDA_ARCH__ >= 530 && MSHADOW_USE_CUDA && CUDA_VERSION >= 7050) 31 #define MSHADOW_CUDA_HALF2 1 32 #include <cuda_fp16.h> 34 #define MSHADOW_CUDA_HALF2 0 44 #define MSHADOW_HALF2_ASSIGNOP(AOP, OP) \ 45 template<typename T> \ 46 MSHADOW_XINLINE half2_t operator AOP (const T& a) { \ 47 return *this = half2_t(*this OP a); \ 52 #if MSHADOW_CUDA_HALF2 60 #if MSHADOW_CUDA_HALF2 70 #if MSHADOW_CUDA_HALF2 71 half2_ = __half2half2(__int2half_rz(a));
73 half_t2[0] = (half_t)a;
74 half_t2[1] = (half_t)a;
83 #if MSHADOW_CUDA_HALF2 84 return half2_t(__hneg2(half2_));
86 return half2_t(-half_t2[0], -half_t2[1]);
91 #if MSHADOW_CUDA_HALF2 94 half_t2[0] = a.half_t2[0];
95 half_t2[1] = a.half_t2[1];
108 #if MSHADOW_CUDA_HALF2 109 return half2_t(__floats2half2_rn(__low2float(a.half2_) + __low2float(b.half2_),
110 __high2float(a.half2_) + __high2float(b.half2_)));
112 return half2_t(a.half_t2[0] + b.half_t2[0], a.half_t2[1] + b.half_t2[1]);
117 #if MSHADOW_CUDA_HALF2 118 return half2_t(__floats2half2_rn(__low2float(a.half2_) - __low2float(b.half2_),
119 __high2float(a.half2_) - __high2float(b.half2_)));
121 return half2_t(a.half_t2[0] - b.half_t2[0], a.half_t2[1] - b.half_t2[1]);
126 #if MSHADOW_CUDA_HALF2 127 return half2_t(__floats2half2_rn(__low2float(a.half2_) * __low2float(b.half2_),
128 __high2float(a.half2_) * __high2float(b.half2_)));
130 return half2_t(a.half_t2[0] * b.half_t2[0], a.half_t2[1] * b.half_t2[1]);
135 #if MSHADOW_CUDA_HALF2 136 return half2_t(__floats2half2_rn(__low2float(a.half2_) / __low2float(b.half2_),
137 __high2float(a.half2_) / __high2float(b.half2_)));
139 return half2_t(a.half_t2[0] / b.half_t2[0], a.half_t2[1] / b.half_t2[1]);
144 #if MSHADOW_CUDA_HALF2 145 return half2_t(__floats2half2_rn(::fmod(__low2float(a.half2_), __low2float(b.half2_)),
146 ::fmod(__high2float(a.half2_), __high2float(b.half2_))));
148 return half2_t(::fmod(a.half_t2[0], b.half_t2[0]), ::fmod(a.half_t2[1], b.half_t2[1]));
153 #if MSHADOW_CUDA_HALF2 154 return __hbeq2(a.half2_, b.half2_);
156 return (a.half_t2[0] == b.half_t2[0] && a.half_t2[1] == b.half_t2[1]);
162 #endif // MSHADOW_HALF2_H_ class MSHADOW_ALIGNED(2) half_t
Definition: half.h:113
#define MSHADOW_HALF2_ASSIGNOP(AOP, OP)
Definition: half2.h:44
MSHADOW_XINLINE half2_t operator+(half2_t a, half2_t b)
overloaded + operator for half2_t
Definition: half2.h:107
#define MSHADOW_XINLINE
Definition: base.h:223
MSHADOW_XINLINE half2_t operator-(half2_t a, half2_t b)
overloaded - operator for half2_t
Definition: half2.h:116
MSHADOW_XINLINE bool operator==(half2_t a, half2_t b)
overloaded == operator for half2_t
Definition: half2.h:152
MSHADOW_XINLINE half2_t operator*(half2_t a, half2_t b)
overloaded * operator for half2_t
Definition: half2.h:125
MSHADOW_XINLINE half2_t operator/(half2_t a, half2_t b)
overloaded / operator for half2_t
Definition: half2.h:134
overloaded + operator between half_t and bf16_t
Definition: base.h:327
MSHADOW_XINLINE half2_t operator%(half2_t a, half2_t b)
overloaded % operator for half2_t
Definition: half2.h:143