docs/api/include_2mxnet_2base_8h_source.html

/*

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 */


#ifndef MXNET_BASE_H_

#define MXNET_BASE_H_


#include "dmlc/base.h"

#include <string>

#include "dmlc/io.h"

#include "dmlc/type_traits.h"

#include "dmlc/parameter.h"

#include "mshadow/tensor.h"

// nnvm headers for symbolic construction.

#include "nnvm/op.h"

#include "nnvm/symbolic.h"

#include "libinfo.h"

#include "tuple.h"


#ifdef _MSC_VER

#ifdef MXNET_EXPORTS

#define MXNET_API __declspec(dllexport)

#else

#define MXNET_API __declspec(dllimport)

#endif

#else

#define MXNET_API

#endif


#ifndef MXNET_PREDICT_ONLY

#define MXNET_PREDICT_ONLY 0

#endif


#define MXNET_MAJOR 2


#define MXNET_MINOR 0


#define MXNET_PATCH 0


#define MXNET_VERSION (MXNET_MAJOR * 10000 + MXNET_MINOR * 100 + MXNET_PATCH)


#define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch)


#define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__)


namespace mxnet {

typedef mshadow::cpu cpu;

typedef mshadow::gpu gpu;

typedef mshadow::index_t index_t;

typedef mshadow::lapack_index_t lapack_index_t;

typedef mshadow::default_real_t real_t;

using Op = nnvm::Op;


struct Context {

  enum DeviceType {

    kCPU       = cpu::kDevMask,

    kGPU       = gpu::kDevMask,

    kCPUPinned = 3,

    kCPUShared = 5,

  };

  DeviceType dev_type;

  int32_t dev_id;

  Context() : dev_type(kCPU), dev_id(0) {}

  inline DeviceType dev_mask() const {

    if (dev_type == kCPUPinned || dev_type == kCPUShared)

      return kCPU;

    return dev_type;

  }

  inline int real_dev_id() const {

    if (dev_type == kCPUPinned || dev_type == kGPU)

      return dev_id;

    return 0;

  }

  inline bool operator<(const Context& b) const;

  inline bool operator==(const Context& b) const {

    return dev_type == b.dev_type && dev_id == b.dev_id;

  }

  inline bool operator!=(const Context& b) const {

    return !(*this == b);

  }

  inline void Save(dmlc::Stream* strm) const {

    strm->Write(&dev_type, sizeof(dev_type));

    strm->Write(&dev_id, sizeof(dev_id));

  }

  inline bool Load(dmlc::Stream* strm) {

    if (strm->Read(&dev_type, sizeof(dev_type)) != sizeof(dev_type))

      return false;

    if (strm->Read(&dev_id, sizeof(int32_t)) != sizeof(int32_t))

      return false;

    return true;

  }

  static const int32_t kMaxDevType = 6;

  static const int32_t kMaxDevID = 16;

  inline static Context Create(DeviceType dev_type, int32_t dev_id = -1);

  inline static Context CPU(int32_t dev_id = 0);

  inline static Context GPU(int32_t dev_id = -1);

  inline static int32_t GetGPUCount();

  inline static bool GPUDriverPresent();

  inline static int32_t GetGPUStreamsPerWorker();

  inline static void GetGPUMemoryInformation(int dev, uint64_t* free, uint64_t* total);

  inline static Context CPUPinned(int32_t dev_id = -1);

  inline static Context CPUShared(int32_t dev_id = 0);

  inline static Context FromString(const std::string& str);


 private:

#if MXNET_USE_CUDA

  static void CudaLibChecks();

#endif

#if MXNET_USE_CUDNN

  static void CuDNNLibChecks();

#endif

};


#if MXNET_USE_CUDA


class GPUAuxStream {

 public:

  explicit GPUAuxStream(mshadow::Stream<gpu>* primary_stream)

      : primary_stream_(primary_stream),

        aux_stream_(primary_stream),

        gpu_stream_sync_event_(nullptr) {

    if (Context::GetGPUStreamsPerWorker() >= 2) {

      // Create auxiliary stream on the same device with the same properties as the primary stream

      bool primary_has_blas_handle =

          primary_stream->blas_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;

      bool primary_has_dnn_handle =

          primary_stream->dnn_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;

      aux_stream_ = mshadow::NewStream<gpu>(

          primary_has_blas_handle, primary_has_dnn_handle, primary_stream->dev_id);

      MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));

    }

  }

  ~GPUAuxStream() {

    // If the aux_stream_ == primary_stream_, then we created no new streams to destroy.

    if (aux_stream_ != primary_stream_) {

      MSHADOW_CATCH_ERROR(mshadow::DeleteStream<gpu>(aux_stream_));

      MSHADOW_CATCH_ERROR(cudaEventDestroy(gpu_stream_sync_event_));

    }

  }

  void PreAuxStreamUseSync() {

    // If the aux_stream_ == primary_stream_, then no synchronization is necessary.

    if (aux_stream_ != primary_stream_)

      StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);

  }

  void PostAuxStreamUseSync() {

    // If the aux_stream_ == primary_stream_, then no synchronization is necessary.

    if (aux_stream_ != primary_stream_)

      StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);

  }

  mshadow::Stream<gpu>* GetStream() {

    return aux_stream_;

  }

  static void StreamSync(mshadow::Stream<gpu>* s1, mshadow::Stream<gpu>* s2, cudaEvent_t event) {

    MSHADOW_CUDA_CALL(cudaEventRecord(event, s1->stream_));

    MSHADOW_CUDA_CALL(cudaStreamWaitEvent(s2->stream_, event, 0));

  }


 private:

  mshadow::Stream<gpu>* primary_stream_;

  mshadow::Stream<gpu>* aux_stream_;

  cudaEvent_t gpu_stream_sync_event_;

};


class SyncedGPUAuxStream {

 public:

  explicit SyncedGPUAuxStream(GPUAuxStream* gpu_aux_stream) : gpu_aux_stream_(gpu_aux_stream) {

    gpu_aux_stream_->PreAuxStreamUseSync();

  }

  ~SyncedGPUAuxStream() {

    gpu_aux_stream_->PostAuxStreamUseSync();

  }

  SyncedGPUAuxStream(const SyncedGPUAuxStream&) = delete;

  void operator=(const SyncedGPUAuxStream&) = delete;

  SyncedGPUAuxStream(SyncedGPUAuxStream&&) = default;

  SyncedGPUAuxStream& operator=(SyncedGPUAuxStream&&) = default;

  inline mshadow::Stream<gpu>* GetStream() const {

    return gpu_aux_stream_->GetStream();

  }


 private:

  GPUAuxStream* gpu_aux_stream_;

};

#endif  // MXNET_USE_CUDA


struct RunContext {

  Context ctx;

  void* stream;

  void* aux_stream;

  void* event_pool = nullptr;

  template <typename xpu>

  inline mshadow::Stream<xpu>* get_stream() const {

    return static_cast<mshadow::Stream<xpu>*>(stream);

  }

#if MXNET_USE_CUDA


  inline SyncedGPUAuxStream get_gpu_aux_stream() const {

    return SyncedGPUAuxStream(static_cast<GPUAuxStream*>(aux_stream));

  }

#endif


  inline const Context& get_ctx() const {

    return ctx;

  }

};

}  // namespace mxnet


namespace mxnet {

// implementing Context

inline bool Context::operator<(const Context& b) const {

  if (dev_type == b.dev_type) {

    return dev_id < b.dev_id;

  } else {

    return dev_type < b.dev_type;

  }

}

inline Context Context::Create(DeviceType dev_type, int32_t dev_id) {

  Context ctx;

  ctx.dev_type = dev_type;

  ctx.dev_id   = dev_id < 0 ? 0 : dev_id;

  if (dev_type & kGPU) {

#if MXNET_USE_CUDA

    CudaLibChecks();

#endif

#if MXNET_USE_CUDNN

    CuDNNLibChecks();

#endif

    if (dev_id < 0) {

#if MXNET_USE_CUDA

      CHECK_EQ(cudaGetDevice(&ctx.dev_id), cudaSuccess);

#else

      LOG(FATAL) << "Please compile with CUDA enabled for cuda features";

#endif

    }

  }

  return ctx;

}

inline Context Context::CPU(int32_t dev_id) {

  return Create(kCPU, dev_id);

}


inline Context Context::CPUPinned(int32_t dev_id) {

  return Create(kCPUPinned, dev_id);

}


inline Context Context::CPUShared(int32_t dev_id) {

  return Create(kCPUShared, dev_id);

}


inline Context Context::GPU(int32_t dev_id) {

  return Create(kGPU, dev_id);

}


inline bool Context::GPUDriverPresent() {

#if MXNET_USE_CUDA

  int cuda_driver_version = 0;

  CHECK_EQ(cudaDriverGetVersion(&cuda_driver_version), cudaSuccess);

  return cuda_driver_version > 0;

#else

  return false;

#endif

}


inline int32_t Context::GetGPUCount() {

#if MXNET_USE_CUDA

  if (!GPUDriverPresent()) {

    return 0;

  }

  int32_t count;

  cudaError_t e = cudaGetDeviceCount(&count);

  // TODO(junwu): Remove e == cudaErrorInsufficientDriver

  // This is skipped for working around wheel build system with older CUDA driver.

  if (e == cudaErrorNoDevice || e == cudaErrorInsufficientDriver) {

    return 0;

  }

  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);

  return count;

#else

  return 0;

#endif

}


inline int32_t Context::GetGPUStreamsPerWorker() {

  // The default number of streams available if the user has not set MXNET_GPU_WORKER_NSTREAMS.

  const int32_t default_num_streams = 1;

  // The get_aux_stream() interface can supply one additional stream beyond the standard one.

  static int32_t num_streams =

      dmlc::GetEnv("MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;

  return num_streams;

}


inline void Context::GetGPUMemoryInformation(int dev, uint64_t* free_mem, uint64_t* total_mem) {

#if MXNET_USE_CUDA


  size_t memF, memT;

  cudaError_t e;


  int curDevice;

  e = cudaGetDevice(&curDevice);

  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);


  e = cudaSetDevice(dev);

  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);


  e = cudaMemGetInfo(&memF, &memT);

  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);


  e = cudaSetDevice(curDevice);

  CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);


  *free_mem  = static_cast<uint64_t>(memF);

  *total_mem = static_cast<uint64_t>(memT);


#else

  LOG(FATAL) << "This call is only supported for MXNet built with CUDA support.";

#endif

}


inline Context Context::FromString(const std::string& str) {

  Context ret;

  try {

    const std::string::size_type l = str.find('(');

    CHECK_NE(l, std::string::npos);

    const std::string::size_type r = str.find(')');

    CHECK_EQ(r, str.length() - 1);


    const std::string type = str.substr(0, l);

    int id                 = std::stoi(str.substr(l + 1, r - l - 1));

    if (type == "cpu") {

      ret = CPU(id);

    } else if (type == "gpu") {

      ret = GPU(id);

    } else if (type == "cpu_pinned") {

      ret = CPUPinned(id);

    } else if (type == "cpu_shared") {

      ret = CPUShared(id);

    } else {

      LOG(FATAL) << "Invalid context string " << str;

    }

  } catch (...) {

    LOG(FATAL) << "Invalid context string " << str;

  }

  return ret;

}


inline std::ostream& operator<<(std::ostream& out, const Context& ctx) {

  if (ctx.dev_type == Context::kCPU) {

    out << "cpu(";

  } else if (ctx.dev_type == Context::kGPU) {

    out << "gpu(";

  } else if (ctx.dev_type == Context::kCPUPinned) {

    out << "cpu_pinned(";

  } else if (ctx.dev_type == Context::kCPUShared) {

    out << "cpu_shared(";

  } else {

    out << "unknown(";

  }

  out << ctx.dev_id << ")";

  return out;

}


// describe op registration point

#define STRINGIZE_DETAIL(x) #x

#define STRINGIZE(x)        STRINGIZE_DETAIL(x)

#define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__))

#define ADD_FILELINE        "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__)


#if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1

constexpr size_t kDNNLAlign = 64;

#endif


}  // namespace mxnet


namespace std {

template <>

struct hash<mxnet::Context> {

  size_t operator()(const mxnet::Context& ctx) const {

    size_t res = 0;

    res        = dmlc::HashCombine(res, static_cast<size_t>(ctx.dev_type));

    res        = dmlc::HashCombine(res, static_cast<size_t>(ctx.dev_id));

    return res;

  }

};


#if __cplusplus < 201402L && !defined(_MSC_VER)

template <typename T, typename... Args>

inline std::unique_ptr<T> make_unique(Args&&... args) {

  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));

}

#endif

}  // namespace std


#include "./tensor_blob.h"

#endif  // MXNET_BASE_H_