mxnet
Namespaces | Functions | Variables
mxnet::common::cuda::rtc Namespace Reference

Namespaces

 util
 

Functions

template<typename Params >
void VectorizedKernelRTCLauncher (const std::string &parameters, const std::string &kernel_name, const std::string &code, int nvec, const index_t lead_dim, const index_t other_dim, mshadow::Stream< gpu > *s, const Params params, const std::vector< TBlob > &inputs, const std::vector< TBlob > &outputs, const int dev_id, const int lead_input_num=0, const index_t blocks=0)
 Launcher helper for the kernels using vectorization. More...
 
int GetMaxSupportedArch ()
 
CUfunction get_function (const std::string &parameters, const std::string &kernel_name, const std::string &code, int dev_id)
 Compile and get the GPU kernel. Uses cache in order to eliminate the overhead of compilation. More...
 
void launch (CUfunction function, const dim3 grid_dim, const dim3 block_dim, unsigned int shared_mem_bytes, mshadow::Stream< gpu > *stream, std::vector< const void * > *args)
 Launch a GPU kernel. More...
 

Variables

const char backward_function_definitions []
 
const char grad_function_definitions []
 
const char function_definitions_util []
 
const char function_definitions_binary []
 
const char function_definitions_unary []
 
const char fp16_support_string []
 
const char reducer []
 
const char logic_reducer []
 
const char special_functions_definitions []
 
const char type_support_string []
 
const char util_string []
 
const char limits []
 
const char vectorization_support_string []
 
std::mutex lock
 

Function Documentation

◆ get_function()

CUfunction mxnet::common::cuda::rtc::get_function ( const std::string &  parameters,
const std::string &  kernel_name,
const std::string &  code,
int  dev_id 
)

Compile and get the GPU kernel. Uses cache in order to eliminate the overhead of compilation.

Parameters
parametersof the kernel (e.g. values of the template arguments, types used)
kernel_namename of the kernel
codeused for compilation of the kernel if not found in cache
dev_idid of the device which the kernel will be launched on

◆ GetMaxSupportedArch()

int mxnet::common::cuda::rtc::GetMaxSupportedArch ( )

◆ launch()

void mxnet::common::cuda::rtc::launch ( CUfunction  function,
const dim3  grid_dim,
const dim3  block_dim,
unsigned int  shared_mem_bytes,
mshadow::Stream< gpu > *  stream,
std::vector< const void * > *  args 
)

Launch a GPU kernel.

Parameters
functionto launch
grid_dimgrid dimensions
block_dimblock dimensions
shared_mem_bytesamount of dynamic shared memory needed by the kernel
streamused for launching the kernel
argsarguments of the kernel

◆ VectorizedKernelRTCLauncher()

template<typename Params >
void mxnet::common::cuda::rtc::VectorizedKernelRTCLauncher ( const std::string &  parameters,
const std::string &  kernel_name,
const std::string &  code,
int  nvec,
const index_t  lead_dim,
const index_t  other_dim,
mshadow::Stream< gpu > *  s,
const Params  params,
const std::vector< TBlob > &  inputs,
const std::vector< TBlob > &  outputs,
const int  dev_id,
const int  lead_input_num = 0,
const index_t  blocks = 0 
)

Launcher helper for the kernels using vectorization.

Parameters
parametersof the kernel (e.g. values of the template arguments)
kernel_namename of the kernel
codeused for compilation of the kernel if not found in cache
nveclength of the vector used for loading/storing data
lead_dimsize of leading dimension of the tensors
other_dimmaximum of the total size of all the other dimensions of the tensors
sstream used to launch the kernel
inputsto the kernel
outputsof the kernel
dev_idid of the devide which the kernel will be launched on
lead_input_numnumber of input to use for checking alignment (in case only a subset of inputs is used vectorized). Default is 0.
blocksif provided and not 0, will launch the specified number of thread blocks. Default is 0.

Variable Documentation

◆ backward_function_definitions

const char mxnet::common::cuda::rtc::backward_function_definitions[]

◆ fp16_support_string

const char mxnet::common::cuda::rtc::fp16_support_string[]

◆ function_definitions_binary

const char mxnet::common::cuda::rtc::function_definitions_binary[]

◆ function_definitions_unary

const char mxnet::common::cuda::rtc::function_definitions_unary[]

◆ function_definitions_util

const char mxnet::common::cuda::rtc::function_definitions_util[]

◆ grad_function_definitions

const char mxnet::common::cuda::rtc::grad_function_definitions[]

◆ limits

const char mxnet::common::cuda::rtc::limits[]

◆ lock

std::mutex mxnet::common::cuda::rtc::lock

◆ logic_reducer

const char mxnet::common::cuda::rtc::logic_reducer[]

◆ reducer

const char mxnet::common::cuda::rtc::reducer[]

◆ special_functions_definitions

const char mxnet::common::cuda::rtc::special_functions_definitions[]

◆ type_support_string

const char mxnet::common::cuda::rtc::type_support_string[]

◆ util_string

const char mxnet::common::cuda::rtc::util_string[]

◆ vectorization_support_string

const char mxnet::common::cuda::rtc::vectorization_support_string[]