Go to the documentation of this file.
26 #ifndef MXNET_COMMON_CUDA_RTC_H_
27 #define MXNET_COMMON_CUDA_RTC_H_
35 #include <cuda_runtime_api.h>
57 extern std::mutex
lock;
67 const std::string& kernel_name,
68 const std::string& code,
79 void launch(CUfunction
function,
82 unsigned int shared_mem_bytes,
84 std::vector<const void*>* args);
91 #endif // MXNET_USE_CUDA
93 #endif // MXNET_COMMON_CUDA_RTC_H_
namespace of mxnet
Definition: api_registry.h:33
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45
std::string to_string(OpReqType req)
Convert OpReqType to string.
int GetMaxSupportedArch()
Definition: stream_gpu-inl.h:37
CUfunction get_function(const std::string ¶meters, const std::string &kernel_name, const std::string &code, int dev_id)
Compile and get the GPU kernel. Uses cache in order to eliminate the overhead of compilation.
Additional operator attributes beside the ones provided by NNVM.
void launch(CUfunction function, const dim3 grid_dim, const dim3 block_dim, unsigned int shared_mem_bytes, mshadow::Stream< gpu > *stream, std::vector< const void * > *args)
Launch a GPU kernel.
configuration of MXNet as well as basic data structure.