mxnet
Namespaces | Macros | Functions
cuda_utils.h File Reference

CUDA debugging utilities. More...

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/optional.h>
#include <mshadow/base.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>
Include dependency graph for cuda_utils.h:

Go to the source code of this file.

Namespaces

 mxnet
 namespace of mxnet
 
 mxnet::common
 
 mxnet::common::cuda
 common utils for cuda
 

Macros

#define CHECK_CUDA_ERROR(msg)
 Check CUDA error. More...
 
#define CUDA_CALL(func)
 Protected CUDA call. More...
 
#define CUBLAS_CALL(func)
 Protected cuBLAS call. More...
 
#define CUSOLVER_CALL(func)
 Protected cuSolver call. More...
 
#define CURAND_CALL(func)
 Protected cuRAND call. More...
 
#define CUDA_UNROLL   _Pragma("unroll")
 
#define CUDA_NOUNROLL   _Pragma("nounroll")
 
#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true
 

Functions

const char * mxnet::common::cuda::CublasGetErrorString (cublasStatus_t error)
 Get string representation of cuBLAS errors. More...
 
const char * mxnet::common::cuda::CusolverGetErrorString (cusolverStatus_t error)
 Get string representation of cuSOLVER errors. More...
 
const char * mxnet::common::cuda::CurandGetErrorString (curandStatus_t status)
 Get string representation of cuRAND errors. More...
 
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMax (DType a, DType b)
 
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMin (DType a, DType b)
 
int ComputeCapabilityMajor (int device_id)
 Determine major version number of the gpu's cuda compute architecture. More...
 
int ComputeCapabilityMinor (int device_id)
 Determine minor version number of the gpu's cuda compute architecture. More...
 
int SMArch (int device_id)
 Return the integer SM architecture (e.g. Volta = 70). More...
 
bool SupportsFloat16Compute (int device_id)
 Determine whether a cuda-capable gpu's architecture supports float16 math. More...
 
bool SupportsTensorCore (int device_id)
 Determine whether a cuda-capable gpu's architecture supports Tensor Core math. More...
 
bool GetEnvAllowTensorCore ()
 Returns global policy for TensorCore algo use. More...
 

Detailed Description

CUDA debugging utilities.

Macro Definition Documentation

#define CHECK_CUDA_ERROR (   msg)
Value:
{ \
cudaError_t e = cudaGetLastError(); \
CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
}

Check CUDA error.

Parameters
msgMessage to print if an error occured.
#define CUBLAS_CALL (   func)
Value:
{ \
cublasStatus_t e = (func); \
CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \
<< "cuBLAS: " << common::cuda::CublasGetErrorString(e); \
}
const char * CublasGetErrorString(cublasStatus_t error)
Get string representation of cuBLAS errors.
Definition: cuda_utils.h:64

Protected cuBLAS call.

Parameters
funcExpression to call.

It checks for cuBLAS errors after invocation of the expression.

#define CUDA_CALL (   func)
Value:
{ \
cudaError_t e = (func); \
CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \
<< "CUDA: " << cudaGetErrorString(e); \
}

Protected CUDA call.

Parameters
funcExpression to call.

It checks for CUDA errors after invocation of the expression.

#define CUDA_NOUNROLL   _Pragma("nounroll")
#define CUDA_UNROLL   _Pragma("unroll")
#define CURAND_CALL (   func)
Value:
{ \
curandStatus_t e = (func); \
CHECK_EQ(e, CURAND_STATUS_SUCCESS) \
<< "cuRAND: " << common::cuda::CurandGetErrorString(e); \
}
const char * CurandGetErrorString(curandStatus_t status)
Get string representation of cuRAND errors.
Definition: cuda_utils.h:124

Protected cuRAND call.

Parameters
funcExpression to call.

It checks for cuRAND errors after invocation of the expression.

#define CUSOLVER_CALL (   func)
Value:
{ \
cusolverStatus_t e = (func); \
CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \
<< "cuSolver: " << common::cuda::CusolverGetErrorString(e); \
}
const char * CusolverGetErrorString(cusolverStatus_t error)
Get string representation of cuSOLVER errors.
Definition: cuda_utils.h:95

Protected cuSolver call.

Parameters
funcExpression to call.

It checks for cuSolver errors after invocation of the expression.

#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true

Function Documentation

int ComputeCapabilityMajor ( int  device_id)
inline

Determine major version number of the gpu's cuda compute architecture.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the major version number of the gpu's cuda compute architecture.
int ComputeCapabilityMinor ( int  device_id)
inline

Determine minor version number of the gpu's cuda compute architecture.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the minor version number of the gpu's cuda compute architecture.
bool GetEnvAllowTensorCore ( )
inline

Returns global policy for TensorCore algo use.

Returns
whether to allow TensorCore algo (if not specified by the Operator locally).
int SMArch ( int  device_id)
inline

Return the integer SM architecture (e.g. Volta = 70).

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the gpu's cuda compute architecture as an int.
bool SupportsFloat16Compute ( int  device_id)
inline

Determine whether a cuda-capable gpu's architecture supports float16 math.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
whether the gpu's architecture supports float16 math.
bool SupportsTensorCore ( int  device_id)
inline

Determine whether a cuda-capable gpu's architecture supports Tensor Core math.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
whether the gpu's architecture supports Tensor Core math.