mxnet
stream_gpu-inl.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
26 #ifndef MSHADOW_STREAM_GPU_INL_H_
27 #define MSHADOW_STREAM_GPU_INL_H_
28 #include <memory>
29 #include "./base.h"
30 #include "./tensor.h"
31 #include "./logging.h"
32 
33 namespace mshadow {
34 #if MSHADOW_USE_CUDA == 1
35 // Stream alocation
36 // actual implementation of GPU stream in CUDA
37 template<>
38 struct Stream<gpu> {
40  enum HandleState {
41  NoHandle = 0,
42  OwnHandle = 1,
43  };
45  cudaStream_t stream_;
47  cublasHandle_t blas_handle_;
49  #if MSHADOW_USE_CUSOLVER == 1
50  cusolverDnHandle_t solver_handle_;
51  #endif
52 
53  #if MSHADOW_USE_CUDNN == 1
54  cudnnHandle_t dnn_handle_;
55  #endif
56 
63  cudaDeviceProp prop;
65  int dev_id;
66 
67  Stream(void)
68  : stream_(0)
69  , blas_handle_(0)
70 #if MSHADOW_USE_CUDNN == 1
71  , dnn_handle_(0)
72 #endif
73  , blas_handle_ownership_(NoHandle)
74  , solver_handle_ownership_(NoHandle)
75  , dnn_handle_ownership_(NoHandle) {}
80  inline void Wait(void) {
81  MSHADOW_CUDA_CALL(cudaStreamSynchronize(stream_));
82  }
87  inline bool CheckIdle(void) {
88  cudaError_t err = cudaStreamQuery(stream_);
89  if (err == cudaSuccess) return true;
90  if (err == cudaErrorNotReady) return false;
91  LOG(FATAL) << cudaGetErrorString(err);
92  return false;
93  }
98  inline static cudaStream_t GetStream(Stream<gpu> *stream) {
99  if (stream == NULL) {
100 #if MSHADOW_FORCE_STREAM
101  LOG(FATAL) << "Default GPU stream was used when MSHADOW_FORCE_STREAM was on";
102 #endif
103  return 0;
104  } else {
105  return stream->stream_;
106  }
107  }
112  inline static cublasHandle_t GetBlasHandle(Stream<gpu> *stream) {
113  if (stream == NULL) {
114  return 0;
115  } else {
116  CHECK_NE(stream->blas_handle_ownership_, NoHandle)
117  << "No handle exist in source stream";
118  return stream->blas_handle_;
119  }
120  }
122  inline void DestroyBlasHandle() {
123  if (blas_handle_ownership_ == OwnHandle) {
124  cublasStatus_t err = cublasDestroy(blas_handle_);
125  blas_handle_ownership_ = NoHandle;
126  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Destory cublas handle failed";
127  }
128  }
130  inline void CreateBlasHandle() {
131  this->DestroyBlasHandle();
132  cublasStatus_t err = cublasCreate(&blas_handle_);
133  blas_handle_ownership_ = OwnHandle;
134  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Create cublas handle failed";
135  err = cublasSetStream(blas_handle_, stream_);
136  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Setting cublas stream failed";
137  }
138 #if MSHADOW_USE_CUSOLVER == 1
139  inline static cusolverDnHandle_t GetSolverHandle(Stream<gpu> *stream) {
140  if (stream == NULL) {
141  return 0;
142  } else {
143  CHECK_NE(stream->solver_handle_ownership_, NoHandle) << "No handle exist in source stream";
144  return stream->solver_handle_;
145  }
146  }
147 #endif
148  inline void DestroySolverHandle() {
149 #if MSHADOW_USE_CUSOLVER == 1
150  if (solver_handle_ownership_ == OwnHandle) {
151  cusolverStatus_t err = cusolverDnDestroy(solver_handle_);
152  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Destory cusolver handle failed";
153  }
154 #endif
155  }
156  inline void CreateSolverHandle() {
157 #if MSHADOW_USE_CUSOLVER == 1
158  this->DestroySolverHandle();
159  cusolverStatus_t err = cusolverDnCreate(&solver_handle_);
160  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Create cusolver handle failed";
161  err = cusolverDnSetStream(solver_handle_, stream_);
162  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Setting cusolver stream failed";
163  this->solver_handle_ownership_ = OwnHandle;
164 #endif
165  }
166 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
167 #if MSHADOW_USE_CUDNN == 1
168  inline static cudnnHandle_t GetDnnHandle(Stream<gpu> *stream) {
169  if (stream == NULL) {
170  return 0;
171  } else {
172  CHECK_NE(stream->dnn_handle_ownership_, NoHandle) << "No handle exist in source stream";
173  return stream->dnn_handle_;
174  }
175  }
176 #endif
177  inline void DestroyDnnHandle() {
178 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
179 #if MSHADOW_USE_CUDNN == 1
180  if (dnn_handle_ownership_ == OwnHandle) {
181  cudnnStatus_t err = cudnnDestroy(dnn_handle_);
182  this->dnn_handle_ownership_ = NoHandle;
183  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
184  }
185 #endif
186  }
187  inline void CreateDnnHandle() {
188 // #if MSHADOW_USE_CUDNN == 1 && defined(__CUDACC__)
189 #if MSHADOW_USE_CUDNN == 1
190  this->DestroyDnnHandle();
191  cudnnStatus_t err = cudnnCreate(&dnn_handle_);
192  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
193  // At this point, we have the resource which may need to be freed
194  this->dnn_handle_ownership_ = OwnHandle;
195  err = cudnnSetStream(dnn_handle_, stream_);
196  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
197 #endif
198  }
199 };
200 template<>
201 inline void DeleteStream<gpu>(Stream<gpu> *stream) {
202  if (stream) {
203  MSHADOW_CUDA_CALL(cudaStreamDestroy(stream->stream_));
204  stream->DestroyBlasHandle();
205  stream->DestroySolverHandle();
206  stream->DestroyDnnHandle();
207  delete stream;
208  }
209 }
210 template<>
211 inline Stream<gpu> *NewStream<gpu>(bool create_blas_handle,
212  bool create_dnn_handle,
213  int dev_id) {
214  // RAII on Cuda exception
215  struct StreamDeleter { void operator()(Stream<gpu> *ptr) const { DeleteStream<gpu>(ptr); } };
216  std::unique_ptr<Stream<gpu>, StreamDeleter> st(new Stream<gpu>());
217  MSHADOW_CUDA_CALL(cudaStreamCreate(&st->stream_));
218  if (create_blas_handle) {
219  st->CreateBlasHandle();
220  st->CreateSolverHandle();
221  }
222  if (create_dnn_handle) {
223  st->CreateDnnHandle();
224  }
225  st->dev_id = dev_id;
226  if (dev_id != -1) {
227  MSHADOW_CUDA_CALL(cudaGetDeviceProperties(&st->prop, dev_id));
228  }
229  return st.release();
230 }
231 #endif
232 } // namespace mshadow
233 #endif // MSHADOW_STREAM_GPU_INL_H_
static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:98
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:61
Definition: stream_gpu-inl.h:38
bool CheckIdle(void)
query whether the the stream is idle
Definition: stream_gpu-inl.h:87
static cusolverDnHandle_t GetSolverHandle(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:139
HandleState
handle state
Definition: stream_gpu-inl.h:40
Stream(void)
Definition: stream_gpu-inl.h:67
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:211
void DestroySolverHandle()
Definition: stream_gpu-inl.h:148
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:271
cudaDeviceProp prop
cudaDeviceProp
Definition: stream_gpu-inl.h:63
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name GPU
Definition: tensor.h:47
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:57
int dev_id
dev id
Definition: stream_gpu-inl.h:65
HandleState solver_handle_ownership_
cusolver handle ownership
Definition: stream_gpu-inl.h:59
void CreateBlasHandle()
Destory original blas handle and create a new one.
Definition: stream_gpu-inl.h:130
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:45
cublasHandle_t blas_handle_
cublas handle
Definition: stream_gpu-inl.h:47
void DestroyDnnHandle()
Definition: stream_gpu-inl.h:177
void Wait(void)
wait for all the computation associated with this stream to complete
Definition: stream_gpu-inl.h:80
static cublasHandle_t GetBlasHandle(Stream< gpu > *stream)
return actual cublasHandle
Definition: stream_gpu-inl.h:112
overloaded + operator between half_t and bf16_t
Definition: base.h:327
void CreateDnnHandle()
Definition: stream_gpu-inl.h:187
void DestroyBlasHandle()
Destory cublas handle if own it.
Definition: stream_gpu-inl.h:122
cusolverDnHandle_t solver_handle_
cusolver handle
Definition: stream_gpu-inl.h:50
#define MSHADOW_USE_CUDNN
use CUDNN support, must ensure that the cudnn include path is correct
Definition: base.h:122
void CreateSolverHandle()
Definition: stream_gpu-inl.h:156
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:201
computaion stream structure, used for asynchronous computations
Definition: tensor.h:384