docs/api/engine_8h_source.html

/*

 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file

 * distributed with this work for additional information

 * regarding copyright ownership.  The ASF licenses this file

 * to you under the Apache License, Version 2.0 (the

 * "License"); you may not use this file except in compliance

 * with the License.  You may obtain a copy of the License at

 *

 *   http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing,

 * software distributed under the License is distributed on an

 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

 * KIND, either express or implied.  See the License for the

 * specific language governing permissions and limitations

 * under the License.

 */


#ifndef MXNET_ENGINE_H_

#define MXNET_ENGINE_H_


#if DMLC_USE_CXX11

#include <algorithm>

#include <memory>

#include <functional>

#endif

#include <utility>

#include <vector>

#include "./base.h"


namespace mxnet {


// forward declare engine

class Engine;


namespace engine {

#if MXNET_USE_CUDA

/* \brief The class wrapping CUDA event with timing disabled. */

class CUDAEvent final {

 public:

  explicit CUDAEvent(Context const& ctx);


  CUDAEvent(CUDAEvent&& other) : event_(other.event_), dev_id_(other.dev_id_) {

    other.event_ = nullptr;

  }


  CUDAEvent(const CUDAEvent& other) = delete;

  void operator=(const CUDAEvent& other) = delete;


  ~CUDAEvent();


  inline std::weak_ptr<cudaEvent_t> GetEvent() noexcept {

    return event_;

  }


 private:

  std::shared_ptr<cudaEvent_t> event_;

  int dev_id_;

};


class CUDAEventPool final {

 public:

  explicit CUDAEventPool(Context const& ctx) : counter_(0) {

    for (size_t i = 0; i < kPoolSize; ++i) {

      events_.emplace_back(ctx);

    }

  }


  inline std::weak_ptr<cudaEvent_t> GetEvent(size_t i) noexcept {

    return events_.at(i).GetEvent();

  }


  inline std::pair<std::weak_ptr<cudaEvent_t>, uint64_t> GetNextEvent() noexcept {

    uint64_t c = counter_++;

    return {events_.at((c) % kPoolSize).GetEvent(), c};

  }


  inline uint64_t GetCounterValue() noexcept {

    return counter_.load();

  }


 private:

  static constexpr size_t kPoolSize = 64;

  std::vector<CUDAEvent> events_;

  std::atomic<uint64_t> counter_;

};


struct EventInfo {

  std::weak_ptr<cudaEvent_t> event;

  cudaStream_t stream;

  uint64_t pool_index;

};

struct SyncObject {

  // vector can carry multiple reader events

  std::vector<EventInfo> reader_events;

  // vector should carry only 1 writer event

  std::vector<EventInfo> writer_event;

  std::mutex mutex;

};

#endif


struct Var {

  virtual size_t version() {

    return version_;

  }

  virtual ~Var() = default;

  template <typename T>

  inline T* Cast();

  size_t version_{0};

#if MXNET_USE_CUDA


  SyncObject sync_object;

#endif

};  // struct Var


struct Opr;

typedef Var* VarHandle;

typedef Opr* OprHandle;

class CallbackOnStart {

 public:

  // use implicit copy and assign

  inline void operator()(const dmlc::Error* error = nullptr) const {

    if (callback_ != nullptr)

      (*callback_)(engine_, param_, error);

  }


 private:

  friend class ::mxnet::Engine;

  void (*callback_)(Engine*, void*, const dmlc::Error*);

  Engine* engine_;

  void* param_;

};

class CallbackOnComplete {

 public:

  // use implicit copy and assign

  inline void operator()(const dmlc::Error* error = nullptr) const {

    (*callback_)(engine_, param_, error);

  }


 private:

  friend class ::mxnet::Engine;

  void (*callback_)(Engine*, void*, const dmlc::Error*);

  Engine* engine_;

  void* param_;

};

}  // namespace engine


#if DMLC_USE_CXX11


enum class FnProperty {

  kNormal,

  kCopyFromGPU,

  kCopyToGPU,

  kCPUPrioritized,

  kAsync,

  kDeleteVar,

  kGPUPrioritized,

  kNoSkip

};  // enum class FnProperty


class MXNET_API Engine {

 public:

  typedef engine::CallbackOnStart CallbackOnStart;

  typedef engine::CallbackOnComplete CallbackOnComplete;

  typedef std::function<void(RunContext)> SyncFn;

  typedef std::function<void(RunContext, CallbackOnStart, CallbackOnComplete)> AsyncFn;

  typedef engine::VarHandle VarHandle;

  typedef engine::OprHandle OprHandle;

  virtual void NotifyShutdown() = 0;

  virtual void Stop() {

    LOG(FATAL) << "Engine cannot be stopped";

  }

  virtual void Start() {

    LOG(FATAL) << "Engine cannot be restarted";

  }

  virtual VarHandle NewVariable() = 0;

  virtual OprHandle NewOperator(AsyncFn fn,

                                std::vector<VarHandle> const& const_vars,

                                std::vector<VarHandle> const& mutable_vars,

                                FnProperty prop      = FnProperty::kNormal,

                                const char* opr_name = nullptr,

                                bool wait            = false) = 0;

  virtual void DeleteOperator(OprHandle op) = 0;

  virtual void Push(OprHandle op, Context exec_ctx, int priority = 0, bool profiling = false) = 0;

  virtual void PushAsync(AsyncFn exec_fun,

                         Context exec_ctx,

                         std::vector<VarHandle> const& const_vars,

                         std::vector<VarHandle> const& mutable_vars,

                         FnProperty prop      = FnProperty::kNormal,

                         int priority         = 0,

                         const char* opr_name = nullptr,

                         bool wait            = false) = 0;

  virtual void DeleteVariable(SyncFn delete_fn, Context exec_ctx, VarHandle var) = 0;

  virtual void WaitForVar(VarHandle var) = 0;

  virtual void WaitForAll() = 0;

  virtual void Throw(VarHandle var) = 0;

  virtual ~Engine() noexcept(false) {}

  static Engine* Get();

  static const std::shared_ptr<Engine>& _GetSharedRef();

  virtual void PushSync(SyncFn exec_fn,

                        Context exec_ctx,

                        std::vector<VarHandle> const& const_vars,

                        std::vector<VarHandle> const& mutable_vars,

                        FnProperty prop      = FnProperty::kNormal,

                        int priority         = 0,

                        const char* opr_name = nullptr) {

    this->PushAsync(

        [exec_fn](RunContext ctx, CallbackOnStart on_start, CallbackOnComplete on_complete) {

          on_start();

          exec_fn(ctx);

          on_complete();

        },

        exec_ctx,

        const_vars,

        mutable_vars,

        prop,

        priority,

        opr_name);

  }


  inline CallbackOnStart CreateOnStart(void (*callback)(Engine*, void*, const dmlc::Error*),

                                       void* param) {

    CallbackOnStart ret;

    ret.callback_ = callback;

    ret.engine_   = this;

    ret.param_    = param;

    return ret;

  }


  inline CallbackOnComplete CreateCallback(void (*callback)(Engine*, void*, const dmlc::Error*),

                                           void* param) {

    CallbackOnComplete ret;

    ret.callback_ = callback;

    ret.engine_   = this;

    ret.param_    = param;

    return ret;

  }

  // For each var vector, sort it and remove the duplicated vars.

  // Also remove vars from read_vars if it also appears in write_vars

  inline void DeduplicateVarHandle(std::vector<engine::VarHandle>* read_vars,

                                   std::vector<engine::VarHandle>* write_vars) {

    std::sort(write_vars->begin(), write_vars->end());

    write_vars->resize(std::unique(write_vars->begin(), write_vars->end()) - write_vars->begin());

    std::sort(read_vars->begin(), read_vars->end());

    read_vars->resize(std::unique(read_vars->begin(), read_vars->end()) - read_vars->begin());

    auto wit  = write_vars->begin();

    auto rtop = read_vars->begin();

    for (auto rit = read_vars->begin(); rit != read_vars->end(); ++rit) {

      while (wit != write_vars->end() && *wit < *rit)

        ++wit;

      if (wit == write_vars->end() || *wit != *rit) {

        *rtop = *rit;

        ++rtop;

      }

    }

    read_vars->resize(rtop - read_vars->begin());

  }

  virtual int bulk_size() const {

    return 0;

  }

  virtual int set_bulk_size(int) {

    return 0;

  }

};      // class Engine

#endif  // DMLC_USE_CXX11

}  // namespace mxnet

#endif  // MXNET_ENGINE_H_