26 #ifndef MSHADOW_STREAM_GPU_INL_H_ 27 #define MSHADOW_STREAM_GPU_INL_H_ 31 #include "./logging.h" 34 #if MSHADOW_USE_CUDA == 1 49 #if MSHADOW_USE_CUSOLVER == 1 53 #if MSHADOW_USE_CUDNN == 1 54 cudnnHandle_t dnn_handle_;
73 , blas_handle_ownership_(NoHandle)
74 , solver_handle_ownership_(NoHandle)
75 , dnn_handle_ownership_(NoHandle) {}
88 cudaError_t err = cudaStreamQuery(stream_);
89 if (err == cudaSuccess)
return true;
90 if (err == cudaErrorNotReady)
return false;
91 LOG(FATAL) << cudaGetErrorString(err);
100 #if MSHADOW_FORCE_STREAM 101 LOG(FATAL) <<
"Default GPU stream was used when MSHADOW_FORCE_STREAM was on";
113 if (stream == NULL) {
117 <<
"No handle exist in source stream";
123 if (blas_handle_ownership_ == OwnHandle) {
124 cublasStatus_t err = cublasDestroy(blas_handle_);
125 blas_handle_ownership_ = NoHandle;
126 CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) <<
"Destory cublas handle failed";
131 this->DestroyBlasHandle();
132 cublasStatus_t err = cublasCreate(&blas_handle_);
133 blas_handle_ownership_ = OwnHandle;
134 CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) <<
"Create cublas handle failed";
135 err = cublasSetStream(blas_handle_, stream_);
136 CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) <<
"Setting cublas stream failed";
138 #if MSHADOW_USE_CUSOLVER == 1 140 if (stream == NULL) {
149 #if MSHADOW_USE_CUSOLVER == 1 150 if (solver_handle_ownership_ == OwnHandle) {
151 cusolverStatus_t err = cusolverDnDestroy(solver_handle_);
152 CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) <<
"Destory cusolver handle failed";
157 #if MSHADOW_USE_CUSOLVER == 1 158 this->DestroySolverHandle();
159 cusolverStatus_t err = cusolverDnCreate(&solver_handle_);
160 CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) <<
"Create cusolver handle failed";
161 err = cusolverDnSetStream(solver_handle_, stream_);
162 CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) <<
"Setting cusolver stream failed";
163 this->solver_handle_ownership_ = OwnHandle;
167 #if MSHADOW_USE_CUDNN == 1 168 inline static cudnnHandle_t GetDnnHandle(
Stream<gpu> *stream) {
169 if (stream == NULL) {
173 return stream->dnn_handle_;
179 #if MSHADOW_USE_CUDNN == 1 180 if (dnn_handle_ownership_ == OwnHandle) {
181 cudnnStatus_t err = cudnnDestroy(dnn_handle_);
182 this->dnn_handle_ownership_ = NoHandle;
183 CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
189 #if MSHADOW_USE_CUDNN == 1 190 this->DestroyDnnHandle();
191 cudnnStatus_t err = cudnnCreate(&dnn_handle_);
192 CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
194 this->dnn_handle_ownership_ = OwnHandle;
195 err = cudnnSetStream(dnn_handle_, stream_);
196 CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
204 stream->DestroyBlasHandle();
205 stream->DestroySolverHandle();
206 stream->DestroyDnnHandle();
212 bool create_dnn_handle,
216 std::unique_ptr<Stream<gpu>, StreamDeleter> st(
new Stream<gpu>());
218 if (create_blas_handle) {
219 st->CreateBlasHandle();
220 st->CreateSolverHandle();
222 if (create_dnn_handle) {
223 st->CreateDnnHandle();
233 #endif // MSHADOW_STREAM_GPU_INL_H_ static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:98
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:61
Definition: stream_gpu-inl.h:38
bool CheckIdle(void)
query whether the the stream is idle
Definition: stream_gpu-inl.h:87
static cusolverDnHandle_t GetSolverHandle(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:139
HandleState
handle state
Definition: stream_gpu-inl.h:40
Stream(void)
Definition: stream_gpu-inl.h:67
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:211
void DestroySolverHandle()
Definition: stream_gpu-inl.h:148
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:271
cudaDeviceProp prop
cudaDeviceProp
Definition: stream_gpu-inl.h:63
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name GPU
Definition: tensor.h:47
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:57
int dev_id
dev id
Definition: stream_gpu-inl.h:65
HandleState solver_handle_ownership_
cusolver handle ownership
Definition: stream_gpu-inl.h:59
void CreateBlasHandle()
Destory original blas handle and create a new one.
Definition: stream_gpu-inl.h:130
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:45
cublasHandle_t blas_handle_
cublas handle
Definition: stream_gpu-inl.h:47
void DestroyDnnHandle()
Definition: stream_gpu-inl.h:177
void Wait(void)
wait for all the computation associated with this stream to complete
Definition: stream_gpu-inl.h:80
static cublasHandle_t GetBlasHandle(Stream< gpu > *stream)
return actual cublasHandle
Definition: stream_gpu-inl.h:112
overloaded + operator between half_t and bf16_t
Definition: base.h:327
void CreateDnnHandle()
Definition: stream_gpu-inl.h:187
void DestroyBlasHandle()
Destory cublas handle if own it.
Definition: stream_gpu-inl.h:122
cusolverDnHandle_t solver_handle_
cusolver handle
Definition: stream_gpu-inl.h:50
#define MSHADOW_USE_CUDNN
use CUDNN support, must ensure that the cudnn include path is correct
Definition: base.h:122
void CreateSolverHandle()
Definition: stream_gpu-inl.h:156
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:201
computaion stream structure, used for asynchronous computations
Definition: tensor.h:384