25 #ifndef MXNET_KVSTORE_H_ 26 #define MXNET_KVSTORE_H_ 30 #include <unordered_map> 34 #include "../../src/kvstore/gradient_compression.h" 36 #if MXNET_USE_DIST_KVSTORE 38 #endif // MXNET_USE_DIST_KVSTORE 74 static KVStore *Create(
const char *type =
"local");
79 inline const std::string&
type() {
return type_; }
86 virtual void SetGradientCompression(
const std::vector<std::pair<std::string, std::string> >
105 virtual void Init(
const std::vector<int>& keys,
106 const std::vector<NDArray>& values) = 0;
112 virtual void Init(
const std::vector<std::string>& str_keys,
113 const std::vector<NDArray>& values) = 0;
150 virtual void Push(
const std::vector<int>& keys,
151 const std::vector<NDArray>& values,
152 int priority = 0) = 0;
160 virtual void Push(
const std::vector<std::string>& str_keys,
161 const std::vector<NDArray>& values,
162 int priority = 0) = 0;
187 virtual void Pull(
const std::vector<int>& keys,
188 const std::vector<NDArray*>& values,
189 int priority = 0,
bool ignore_sparse =
true) = 0;
197 virtual void Pull(
const std::vector<std::string>& str_keys,
198 const std::vector<NDArray*>& values,
199 int priority = 0,
bool ignore_sparse =
true) = 0;
209 virtual void PushPull(
const std::vector<int>& vkeys,
210 const std::vector<int>& okeys,
211 const std::vector<NDArray>& values,
212 const std::vector<NDArray*>& outs,
213 int priority = 0) = 0;
223 virtual void PushPull(
const std::vector<std::string>& str_vkeys,
224 const std::vector<std::string>& str_okeys,
225 const std::vector<NDArray>& values,
226 const std::vector<NDArray*>& outs,
227 int priority = 0) = 0;
236 virtual void PullRowSparse(
const std::vector<int>& str_keys,
237 const std::vector<std::pair<NDArray*, NDArray>>& val_rowids,
238 int priority = 0) = 0;
248 virtual void PullRowSparse(
const std::vector<std::string>& str_keys,
249 const std::vector<std::pair<NDArray*, NDArray>>& val_rowids,
250 int priority = 0) = 0;
255 typedef std::function<void(int, const NDArray&, NDArray*)>
Updater;
259 typedef std::function<void(const std::string&, const NDArray&, NDArray*)>
StrUpdater;
270 CHECK(updater) <<
"invalid updater";
284 CHECK(updater) <<
"invalid updater";
285 str_updater_ = updater;
296 static void InitPSEnv(
const std::unordered_map<std::string, std::string>& envs) {
297 #if MXNET_USE_DIST_KVSTORE 298 ps::Environment::Init(envs);
300 LOG(FATAL) <<
"compile with USE_DIST_KVSTORE=1 to init parameter server's environment";
301 #endif // MXNET_USE_DIST_KVSTORE 310 #if MXNET_USE_DIST_KVSTORE 311 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
312 return (role_str ==
nullptr) || (!strcmp(role_str,
"worker"));
315 #endif // MXNET_USE_DIST_KVSTORE 324 #if MXNET_USE_DIST_KVSTORE 325 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
326 return (role_str !=
nullptr) && (!strcmp(role_str,
"server"));
329 #endif // MXNET_USE_DIST_KVSTORE 333 #if MXNET_USE_DIST_KVSTORE 334 if (!IsWorkerNode()) LOG(FATAL) <<
"barrier_before_exit takes effect only on worker nodes";
335 barrier_before_exit_ = barrier_before_exit;
337 LOG(FATAL) <<
"compile with USE_DIST_KVSTORE=1 to enable barrier";
347 #if MXNET_USE_DIST_KVSTORE 348 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
349 return (role_str !=
nullptr) && (!strcmp(role_str,
"scheduler"));
352 #endif // MXNET_USE_DIST_KVSTORE 413 const std::string& params) {
414 LOG(INFO) <<
"Unable to pass server the profiler command. If you are using " 415 <<
"distributed kvstore, you need to compile with USE_DIST_KVSTORE=1." 416 <<
"If you are training on single machine, then there is no server process" 417 <<
"to profile. Please profile the worker process instead.";
423 typedef std::function<void(int, const std::string&)>
Controller;
438 virtual void RunServer(
const Controller& controller) { }
465 std::atomic<bool> barrier_before_exit_{
true};
469 #endif // MXNET_KVSTORE_H_ distributed key-value store
Definition: kvstore.h:59
std::function< void(int, const NDArray &, NDArray *)> Updater
the prototype of user-defined updater
Definition: kvstore.h:255
namespace of mxnet
Definition: base.h:89
virtual int get_rank() const
Definition: kvstore.h:361
virtual void set_updater(const StrUpdater &updater)
set an updater with string keys
Definition: kvstore.h:283
Updater updater_
the user-defined updater
Definition: kvstore.h:444
const std::string & type()
return the type
Definition: kvstore.h:79
static bool IsSchedulerNode()
Definition: kvstore.h:346
virtual void Barrier()
global barrier among all worker machines
Definition: kvstore.h:391
static void InitPSEnv(const std::unordered_map< std::string, std::string > &envs)
initalize ps-lite environment variables
Definition: kvstore.h:296
static bool IsWorkerNode()
Definition: kvstore.h:309
virtual ~KVStore()
virtual destructor
Definition: kvstore.h:62
void set_barrier_before_exit(const bool barrier_before_exit)
Definition: kvstore.h:332
virtual void SetServerProfilerCommand(const KVStoreServerProfilerCommand type, const std::string ¶ms)
Sends server profiler commands to all server nodes Only the worker with rank=0 sends the command whic...
Definition: kvstore.h:412
StrUpdater str_updater_
the user-defined updater with string keys
Definition: kvstore.h:449
virtual int get_num_dead_node(int node_id, int timeout=60) const
Definition: kvstore.h:380
std::shared_ptr< kvstore::GradientCompression > gradient_compression_
Gradient compression object starts with GC_NONE mode Used if SetGradientCompression sets the type...
Definition: kvstore.h:460
virtual void RunServer(const Controller &controller)
Run as server (or scheduler)
Definition: kvstore.h:438
std::function< void(const std::string &, const NDArray &, NDArray *)> StrUpdater
the prototype of user-defined updater with string keys
Definition: kvstore.h:259
virtual void SendCommandToServers(int cmd_id, const std::string &cmd_body)
Send a command to all server nodes.
Definition: kvstore.h:404
std::string type_
the kvstore type
Definition: kvstore.h:454
std::function< void(int, const std::string &)> Controller
the prototype of a server controller
Definition: kvstore.h:423
virtual void set_updater(const Updater &updater)
set an updater
Definition: kvstore.h:269
virtual int get_group_size() const
Definition: kvstore.h:368
KVStoreServerProfilerCommand
enum to denote types of commands kvstore sends to server regarding profiler kSetConfig sets profiler ...
Definition: kvstore.h:49
static bool IsServerNode()
Definition: kvstore.h:323