common utils for cuda
More...
|
const char * | CublasGetErrorString (cublasStatus_t error) |
| Get string representation of cuBLAS errors. More...
|
|
const char * | CusolverGetErrorString (cusolverStatus_t error) |
| Get string representation of cuSOLVER errors. More...
|
|
const char * | CurandGetErrorString (curandStatus_t status) |
| Get string representation of cuRAND errors. More...
|
|
template<typename DType > |
DType __device__ | CudaMax (DType a, DType b) |
|
template<typename DType > |
DType __device__ | CudaMin (DType a, DType b) |
|
int | get_load_type (size_t N) |
| Get the largest datatype suitable to read requested number of bytes. More...
|
|
int | get_rows_per_block (size_t row_size, int num_threads_per_block) |
| Determine how many rows in a 2D matrix should a block of threads handle based on the row size and the number of threads in a block. More...
|
|
const char* mxnet::common::cuda::CublasGetErrorString |
( |
cublasStatus_t |
error | ) |
|
|
inline |
Get string representation of cuBLAS errors.
- Parameters
-
- Returns
- String representation.
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMax |
( |
DType |
a, |
|
|
DType |
b |
|
) |
| |
|
inline |
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMin |
( |
DType |
a, |
|
|
DType |
b |
|
) |
| |
|
inline |
const char* mxnet::common::cuda::CurandGetErrorString |
( |
curandStatus_t |
status | ) |
|
|
inline |
Get string representation of cuRAND errors.
- Parameters
-
- Returns
- String representation.
const char* mxnet::common::cuda::CusolverGetErrorString |
( |
cusolverStatus_t |
error | ) |
|
|
inline |
Get string representation of cuSOLVER errors.
- Parameters
-
- Returns
- String representation.
int mxnet::common::cuda::get_load_type |
( |
size_t |
N | ) |
|
Get the largest datatype suitable to read requested number of bytes.
Number of bytes to be read
- Returns
- mshadow representation of type that could be used for reading
int mxnet::common::cuda::get_rows_per_block |
( |
size_t |
row_size, |
|
|
int |
num_threads_per_block |
|
) |
| |
Determine how many rows in a 2D matrix should a block of threads handle based on the row size and the number of threads in a block.
- Parameters
-
row_size | Size of the row expressed in the number of reads required to fully load it. For example, if the row has N elements, but each thread reads 2 elements with a single read, row_size should be N / 2. |
num_threads_per_block | Number of threads in a block. |
- Returns
- the number of rows that should be handled by a single block.