Go to the documentation of this file.
15 #include "./logging.h"
19 #define __DMLC_COMMA ,
55 template<
typename DType>
63 virtual bool Next(
void) = 0;
65 virtual const DType &
Value(
void)
const = 0;
73 template<
typename IndexType,
typename DType = real_t>
117 return value == NULL ? DType(1.0f) :
value[i];
137 return qid == NULL ? 0 : *
qid;
148 V sum =
static_cast<V
>(0);
150 for (
size_t i = 0; i <
length; ++i) {
151 CHECK(
index[i] < size) <<
"feature index exceed bound";
155 for (
size_t i = 0; i <
length; ++i) {
156 CHECK(
index[i] < size) <<
"feature index exceed bound";
174 template<
typename IndexType,
typename DType = real_t>
200 size_t cost =
size * (
sizeof(size_t) +
sizeof(DType));
202 if (
qid != NULL) cost +=
size *
sizeof(size_t);
204 if (
field != NULL) cost += ndata *
sizeof(IndexType);
205 if (
index != NULL) cost += ndata *
sizeof(IndexType);
206 if (
value != NULL) cost += ndata *
sizeof(DType);
216 CHECK(begin <= end && end <=
size);
218 ret.
size = end - begin;
253 template<
typename IndexType,
typename DType = real_t>
273 virtual size_t NumCol()
const = 0;
292 template <
typename IndexType,
typename DType = real_t>
313 virtual size_t BytesRead(
void)
const = 0;
316 (
const std::string& path,
317 const std::map<std::string, std::string>& args,
327 template<
typename IndexType,
typename DType = real_t>
330 typename Parser<IndexType, DType>::Factory> {};
358 #define DMLC_REGISTER_DATA_PARSER(IndexType, DataType, TypeName, FactoryFunction) \
359 DMLC_REGISTRY_REGISTER(ParserFactoryReg<IndexType __DMLC_COMMA DataType>, \
360 ParserFactoryReg ## _ ## IndexType ## _ ## DataType, TypeName) \
361 .set_body(FactoryFunction)
365 template<
typename IndexType,
typename DType>
366 inline Row<IndexType, DType>
370 inst.
label = label + rowid;
371 if (weight != NULL) {
372 inst.
weight = weight + rowid;
377 inst.
qid = qid + rowid;
381 inst.
length = offset[rowid + 1] - offset[rowid];
383 inst.
field = field + offset[rowid];
387 inst.
index = index + offset[rowid];
391 inst.
value = value + offset[rowid];
397 #endif // DMLC_DATA_H_
const IndexType * index
feature index
Definition: data.h:189
DType get_label() const
Definition: data.h:122
virtual const DType & Value(void) const =0
get current data
Common base class for function registry.
Definition: registry.h:151
const DType * value
feature value, can be NULL, indicating all values are 1
Definition: data.h:191
namespace for dmlc
Definition: array_view.h:12
IndexType get_index(size_t i) const
Definition: data.h:108
const IndexType * field
field id
Definition: data.h:187
const DType * label
array[size] label of each instance
Definition: data.h:181
const real_t * weight
With weight: array[size] label of each instance, otherwise nullptr.
Definition: data.h:183
const DType * label
label of the instance
Definition: data.h:77
RowBlock Slice(size_t begin, size_t end) const
slice a RowBlock to get rows in [begin, end)
Definition: data.h:215
real_t get_weight() const
Definition: data.h:129
uint64_t get_qid() const
Definition: data.h:136
const IndexType * field
field of each instance
Definition: data.h:87
defines configuration macros
virtual size_t BytesRead(void) const =0
virtual size_t NumCol() const =0
const uint64_t * qid
session-id of the instance
Definition: data.h:81
virtual void BeforeFirst(void)=0
set before first of the item
virtual ~DataIter(void) DMLC_THROW_EXCEPTION
destructor
Definition: data.h:59
one row of training instance
Definition: data.h:74
static Parser< IndexType, DType > * Create(const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
create a new instance of parser based on the "type"
unsigned index_t
this defines the unsigned integer type that can normally be used to store feature index
Definition: data.h:32
const uint64_t * qid
With qid: array[size] session id of each instance, otherwise nullptr.
Definition: data.h:185
size_t MemCostBytes(void) const
Definition: data.h:199
virtual bool Next(void)=0
move to next item
registry entry of parser factory
Definition: data.h:328
const IndexType * index
index of each instance
Definition: data.h:91
defines serializable interface of dmlc
Row< IndexType, DType > operator[](size_t rowid) const
get specific rows in the batch
Definition: data.h:367
data iterator interface this is not a C++ style iterator, but nice for data pulling:) This interface ...
Definition: data.h:56
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition: data.h:175
const size_t * offset
array[size+1], row pointer to beginning of each rows
Definition: data.h:179
size_t length
length of the sparse vector
Definition: data.h:83
V SDot(const V *weight, size_t size) const
helper function to compute dot product of current
Definition: data.h:147
DType get_value(size_t i) const
Definition: data.h:116
static RowBlockIter< IndexType, DType > * Create(const char *uri, unsigned part_index, unsigned num_parts, const char *type)
create a new instance of iterator that returns rowbatch by default, a in-memory based iterator will b...
size_t size
batch size
Definition: data.h:177
parser interface that parses input data used to load dmlc data format into your own data format Diffe...
Definition: data.h:293
float real_t
this defines the float point that will be used to store feature values
Definition: data.h:26
Registry utility that helps to build registry singletons.
IndexType get_field(size_t i) const
Definition: data.h:101
#define DMLC_THROW_EXCEPTION
Definition: base.h:233
Data structure that holds the data Row block iterator interface that gets RowBlocks Difference betwee...
Definition: data.h:254
const real_t * weight
weight of the instance
Definition: data.h:79
const DType * value
array value of each instance, this can be NULL indicating every value is set to be 1
Definition: data.h:96