45 #if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__) 46 #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 47 #error "Currently we need g++ 4.8 or higher to fully support c++11 features" 58 #define MXNET_API __declspec(dllexport) 60 #define MXNET_API __declspec(dllimport) 69 #ifndef MXNET_PREDICT_ONLY 70 #define MXNET_PREDICT_ONLY 0 80 #define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) 82 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch) 86 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__) 151 return !(*
this == b);
158 strm->
Write(&dev_type,
sizeof(dev_type));
159 strm->
Write(&dev_id,
sizeof(dev_id));
167 if (strm->
Read(&dev_type,
sizeof(dev_type)) !=
sizeof(
dev_type))
return false;
168 if (strm->
Read(&dev_id,
sizeof(int32_t)) !=
sizeof(int32_t))
return false;
182 inline static Context CPU(int32_t dev_id = 0);
188 inline static Context GPU(int32_t dev_id = -1);
233 static void CudaLibChecks();
236 static void CuDNNLibChecks();
249 primary_stream_(primary_stream),
250 aux_stream_(primary_stream),
251 gpu_stream_sync_event_(nullptr) {
254 bool primary_has_blas_handle =
256 bool primary_has_dnn_handle =
259 primary_has_dnn_handle,
261 MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));
267 if (aux_stream_ != primary_stream_) {
277 if (aux_stream_ != primary_stream_)
278 StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);
285 if (aux_stream_ != primary_stream_)
286 StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);
304 cudaEvent_t gpu_stream_sync_event_;
322 gpu_aux_stream_->PreAuxStreamUseSync();
326 gpu_aux_stream_->PostAuxStreamUseSync();
344 #endif // MXNET_USE_CUDA 370 template<
typename xpu>
404 if (dev_type &
kGPU) {
413 CHECK_EQ(cudaGetDevice(&ctx.
dev_id), cudaSuccess);
415 LOG(FATAL) <<
"Please compile with CUDA enabled for cuda features";
439 int cuda_driver_version = 0;
440 CHECK_EQ(cudaDriverGetVersion(&cuda_driver_version), cudaSuccess);
441 return cuda_driver_version > 0;
453 cudaError_t e = cudaGetDeviceCount(&count);
456 if (e == cudaErrorNoDevice || e == cudaErrorInsufficientDriver) {
459 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
468 const int32_t default_num_streams = 1;
470 static int32_t num_streams =
471 dmlc::GetEnv(
"MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;
476 uint64_t *total_mem) {
483 e = cudaGetDevice(&curDevice);
484 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
486 e = cudaSetDevice(dev);
487 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
489 e = cudaMemGetInfo(&memF, &memT);
490 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
492 e = cudaSetDevice(curDevice);
493 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
495 *free_mem =
static_cast<uint64_t
>(memF);
496 *total_mem =
static_cast<uint64_t
>(memT);
500 <<
"This call is only supported for MXNet built with CUDA support.";
507 const std::string::size_type l = str.find(
'(');
508 CHECK_NE(l, std::string::npos);
509 const std::string::size_type r = str.find(
')');
510 CHECK_EQ(r, str.length()-1);
512 const std::string type = str.substr(0, l);
513 int id = std::stoi(str.substr(l+1, r-l-1));
516 }
else if (type ==
"gpu") {
518 }
else if (type ==
"cpu_pinned") {
520 }
else if (type ==
"cpu_shared") {
523 LOG(FATAL) <<
"Invalid context string " << str;
526 LOG(FATAL) <<
"Invalid context string " << str;
537 out <<
"cpu_pinned(";
539 out <<
"cpu_shared(";
548 #define STRINGIZE_DETAIL(x) #x 549 #define STRINGIZE(x) STRINGIZE_DETAIL(x) 550 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__)) 551 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) 554 #if MXNET_USE_MKLDNN == 1 555 constexpr
size_t kMKLDNNAlign = 64;
561 template<>
struct hash<mxnet::
Context> {
570 #if __cplusplus < 201402L && !defined(_MSC_VER) 571 template<
typename T,
typename... Args>
572 inline std::unique_ptr<T> make_unique(Args&&... args) {
573 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
580 #endif // MXNET_BASE_H_ DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:120
static const int32_t kMaxDevID
the maximal device index
Definition: base.h:174
bool is_bulk
indicator of whether this execution is run in bulk mode
Definition: base.h:364
static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:79
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:42
namespace of mxnet
Definition: base.h:89
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU
SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream)
constructor.
Definition: base.h:321
Definition: stream_gpu-inl.h:19
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:371
void PostAuxStreamUseSync()
Makes future primary stream work wait on the completion of existing aux stream work.
Definition: base.h:283
static int32_t GetGPUCount()
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:166
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:97
static Context GPU(int32_t dev_id=-1)
Definition: optional.h:241
GPUAuxStream(mshadow::Stream< gpu > *primary_stream)
constructor.
Definition: base.h:248
Context ctx
base Context
Definition: base.h:352
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:172
execution time context. The information needed in runtime for actual execution.
Definition: base.h:350
interface of stream I/O for serialization
Definition: io.h:30
Holds an auxiliary mshadow gpu stream that can be synced with a primary stream.
Definition: base.h:242
mshadow::Stream< gpu > * GetStream()
Getter for created auxiliary stream.
Definition: base.h:289
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:192
Provides automatic coordination of an auxilary stream with a primary one. This object, upon construction, prepares an aux stream for use by syncing it with enqueued primary-stream work. Object destruction will sync again so future primary-stream work will wait on enqueued aux-stream work. If MXNET_GPU_WORKER_NSTREAMS == 1, then this defaults simply: the primary stream will equal the aux stream and the syncs will be executed as nops. See ./src/operator/cudnn/cudnn_convolution-inl.h for a usage example.
Definition: base.h:315
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:252
void * aux_stream
the auxiliary stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:360
~GPUAuxStream()
destructor
Definition: base.h:265
DeviceType dev_type
the device type we run the op on
Definition: base.h:111
static bool GPUDriverPresent()
device name CPU
Definition: tensor.h:21
device name GPU
Definition: tensor.h:28
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:32
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:38
static int32_t GetGPUStreamsPerWorker()
size_t HashCombine(size_t key, const T &value)
hash an object and combines the key with previous keys
Definition: common.h:37
int dev_id
dev id
Definition: stream_gpu-inl.h:46
int32_t dev_id
device id we are going to run it on
Definition: base.h:113
#define MSHADOW_CATCH_ERROR(func)
Run function and catch error, log unknown error.
Definition: base.h:266
int32_t index_t
type that will be used for index
Definition: base.h:291
mshadow::Stream< gpu > * GetStream() const
Getter for underlying mshadow::Stream<gpu>.
Definition: base.h:337
void * stream
the stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:356
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:157
mshadow::gpu gpu
mxnet gpu
Definition: base.h:93
float default_real_t
float point type that will be used in default by mshadow
Definition: base.h:303
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:384
DeviceType
Type of device.
Definition: base.h:104
static Context CPUShared(int32_t dev_id=0)
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:26
mshadow::cpu cpu
mxnet cpu
Definition: base.h:91
virtual size_t Read(void *ptr, size_t size)=0
reads data from a stream
~SyncedGPUAuxStream()
destructor
Definition: base.h:325
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:127
nnvm::Op Op
operator structure from NNVM
Definition: base.h:99
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:25
Context()
default constructor
Definition: base.h:115
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:150
Data structure Tuple and TShape to store dynamic sized shapes.
static Context CPU(int32_t dev_id=0)
virtual void Write(const void *ptr, size_t size)=0
writes data to a stream
SyncedGPUAuxStream get_gpu_aux_stream() const
get an RAII object that transparently handles the syncing of the auxiliary stream.
Definition: base.h:379
static Context CPUPinned(int32_t dev_id=-1)
Operator information structor.
void PreAuxStreamUseSync()
Makes future aux stream work wait on the completion of existing primary stream work.
Definition: base.h:275
static Context FromString(const std::string &str)
static void StreamSync(mshadow::Stream< gpu > *s1, mshadow::Stream< gpu > *s2, cudaEvent_t event)
Make future work enqueued to s2 wait on completion of current work enqueued to s1.
Definition: base.h:296
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:95
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...
Symbolic graph construction API.
std::ostream & operator<<(std::ostream &os, const optional< T > &t)
serialize an optional object to string.
Definition: optional.h:141
Context information about the execution environment.
Definition: base.h:102
Provide lightweight util to do parameter setup and checking.
type traits information header
Operator structure.
Definition: op.h:104
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:182
get features of the MXNet library at runtime
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:142
computaion stream structure, used for asynchronous computations
Definition: tensor.h:365