Go to the documentation of this file.
44 #define MXNET_API __declspec(dllexport)
46 #define MXNET_API __declspec(dllimport)
55 #ifndef MXNET_PREDICT_ONLY
56 #define MXNET_PREDICT_ONLY 0
66 #define MXNET_VERSION (MXNET_MAJOR * 10000 + MXNET_MINOR * 100 + MXNET_PATCH)
68 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch)
72 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__)
141 return !(*
this == b);
159 if (strm->
Read(&
dev_id,
sizeof(int32_t)) !=
sizeof(int32_t))
225 static void CudaLibChecks();
228 static void CuDNNLibChecks();
241 : primary_stream_(primary_stream),
242 aux_stream_(primary_stream),
243 gpu_stream_sync_event_(nullptr) {
246 bool primary_has_blas_handle =
248 bool primary_has_dnn_handle =
251 primary_has_blas_handle, primary_has_dnn_handle, primary_stream->
dev_id);
252 MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));
258 if (aux_stream_ != primary_stream_) {
268 if (aux_stream_ != primary_stream_)
269 StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);
276 if (aux_stream_ != primary_stream_)
277 StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);
297 cudaEvent_t gpu_stream_sync_event_;
337 #endif // MXNET_USE_CUDA
363 template <
typename xpu>
406 CHECK_EQ(cudaGetDevice(&ctx.dev_id), cudaSuccess);
408 LOG(FATAL) <<
"Please compile with CUDA enabled for cuda features";
432 int cuda_driver_version = 0;
433 CHECK_EQ(cudaDriverGetVersion(&cuda_driver_version), cudaSuccess);
434 return cuda_driver_version > 0;
446 cudaError_t e = cudaGetDeviceCount(&count);
449 if (e == cudaErrorNoDevice || e == cudaErrorInsufficientDriver) {
452 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
461 const int32_t default_num_streams = 1;
463 static int32_t num_streams =
464 dmlc::GetEnv(
"MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;
475 e = cudaGetDevice(&curDevice);
476 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
478 e = cudaSetDevice(dev);
479 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
481 e = cudaMemGetInfo(&memF, &memT);
482 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
484 e = cudaSetDevice(curDevice);
485 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
487 *free_mem =
static_cast<uint64_t
>(memF);
488 *total_mem =
static_cast<uint64_t
>(memT);
491 LOG(FATAL) <<
"This call is only supported for MXNet built with CUDA support.";
498 const std::string::size_type l = str.find(
'(');
499 CHECK_NE(l, std::string::npos);
500 const std::string::size_type r = str.find(
')');
501 CHECK_EQ(r, str.length() - 1);
503 const std::string type = str.substr(0, l);
504 int id = std::stoi(str.substr(l + 1, r - l - 1));
507 }
else if (type ==
"gpu") {
509 }
else if (type ==
"cpu_pinned") {
511 }
else if (type ==
"cpu_shared") {
514 LOG(FATAL) <<
"Invalid context string " << str;
517 LOG(FATAL) <<
"Invalid context string " << str;
522 inline std::ostream&
operator<<(std::ostream& out,
const Context& ctx) {
528 out <<
"cpu_pinned(";
530 out <<
"cpu_shared(";
534 out << ctx.dev_id <<
")";
539 #define STRINGIZE_DETAIL(x) #x
540 #define STRINGIZE(x) STRINGIZE_DETAIL(x)
541 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__))
542 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__)
544 #if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1
545 constexpr
size_t kDNNLAlign = 64;
552 struct hash<
mxnet::Context> {
561 #if __cplusplus < 201402L && !defined(_MSC_VER)
562 template <
typename T,
typename... Args>
563 inline std::unique_ptr<T> make_unique(Args&&... args) {
564 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
571 #endif // MXNET_BASE_H_
namespace of mxnet
Definition: api_registry.h:33
@ kCPU
Definition: base.h:93
void operator=(const SyncedGPUAuxStream &)=delete
copy assignment operator deleted to prevent unexpected synchronizations.
computaion stream structure, used for asynchronous computations
Definition: tensor.h:488
Provides automatic coordination of an auxilary stream with a primary one. This object,...
Definition: base.h:308
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:255
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU
~GPUAuxStream()
destructor
Definition: base.h:256
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:164
get features of the MXNet library at runtime
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:44
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:264
Provide lightweight util to do parameter setup and checking.
DeviceType dev_type
the device type we run the op on
Definition: base.h:99
static int32_t GetGPUCount()
Operator information structor.
defines configuration macros
Holds an auxiliary mshadow gpu stream that can be synced with a primary stream.
Definition: base.h:234
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:132
execution time context. The information needed in runtime for actual execution.
Definition: base.h:343
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
int dev_id
dev id
Definition: stream_gpu-inl.h:71
static Context GPU(int32_t dev_id=-1)
static int32_t GetGPUStreamsPerWorker()
virtual void Write(const void *ptr, size_t size)=0
writes data to a stream
mshadow::Stream< gpu > * GetStream() const
Getter for underlying mshadow::Stream<gpu>.
Definition: base.h:330
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:156
@ kCPUShared
Definition: base.h:96
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:108
device name GPU
Definition: tensor.h:46
device name CPU
Definition: tensor.h:39
size_t HashCombine(size_t key, const T &value)
hash an object and combines the key with previous keys
Definition: common.h:37
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
mshadow::lapack_index_t lapack_index_t
index type for blas library.
Definition: base.h:83
DeviceType
Type of device.
Definition: base.h:92
@ kCPUPinned
Definition: base.h:95
static bool GPUDriverPresent()
static Context CPUShared(int32_t dev_id=0)
Definition: stream_gpu-inl.h:37
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:266
~SyncedGPUAuxStream()
destructor
Definition: base.h:318
std::ostream & operator<<(std::ostream &os, const optional< T > &t)
serialize an optional object to string.
Definition: optional.h:151
Symbolic graph construction API.
static void StreamSync(mshadow::Stream< gpu > *s1, mshadow::Stream< gpu > *s2, cudaEvent_t event)
Make future work enqueued to s2 wait on completion of current work enqueued to s1.
Definition: base.h:289
int32_t index_t
type that will be used for index
Definition: base.h:328
GPUAuxStream(mshadow::Stream< gpu > *primary_stream)
constructor.
Definition: base.h:240
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:60
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:364
int32_t dev_id
device id we are going to run it on
Definition: base.h:101
int lapack_index_t
Definition: base.h:344
@ kGPU
Definition: base.h:94
defines serializable interface of dmlc
static Context CPUPinned(int32_t dev_id=-1)
#define MSHADOW_CATCH_ERROR(func)
Run function and catch error, log unknown error.
Definition: base.h:278
mshadow::Stream< gpu > * GetStream()
Getter for created auxiliary stream.
Definition: base.h:280
Context information about the execution environment.
Definition: base.h:90
virtual size_t Read(void *ptr, size_t size)=0
reads data from a stream
mshadow::cpu cpu
mxnet cpu
Definition: base.h:77
float default_real_t
float point type that will be used in default by mshadow
Definition: base.h:348
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:147
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:64
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:81
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:116
Definition: optional.h:251
type traits information header
SyncedGPUAuxStream get_gpu_aux_stream() const
get an RAII object that transparently handles the syncing of the auxiliary stream.
Definition: base.h:372
static Context FromString(const std::string &str)
static Context CPU(int32_t dev_id=0)
void * stream
the stream of the device, can be nullptr or Stream<gpu>* in GPU mode
Definition: base.h:349
Context ctx
base Context
Definition: base.h:345
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:140
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.
Data structure Tuple and TShape to store dynamic sized shapes.
Context()
default constructor
Definition: base.h:103
void PreAuxStreamUseSync()
Makes future aux stream work wait on the completion of existing primary stream work.
Definition: base.h:266
mshadow::gpu gpu
mxnet gpu
Definition: base.h:79
void PostAuxStreamUseSync()
Makes future primary stream work wait on the completion of existing aux stream work.
Definition: base.h:274
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:50
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:377
nnvm::Op Op
operator structure from NNVM
Definition: base.h:87
void * aux_stream
the auxiliary stream of the device, can be nullptr or Stream<gpu>* in GPU mode
Definition: base.h:353
Operator structure.
Definition: op.h:105
interface of stream I/O for serialization
Definition: io.h:30
static const int32_t kMaxDevID
the maximal device index
Definition: base.h:166
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:85
void * event_pool
pointer to the cuda event pool used by the dependency engine
Definition: base.h:357
DeviceType
Definition: ndarray.h:40
SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream)
constructor.
Definition: base.h:314