mxnet
recordio.h
Go to the documentation of this file.
1 
8 #ifndef DMLC_RECORDIO_H_
9 #define DMLC_RECORDIO_H_
10 #include <cstring>
11 #include <string>
12 #include "./io.h"
13 #include "./logging.h"
14 
15 namespace dmlc {
39  public:
45  static const uint32_t kMagic = 0xced7230a;
52  inline static uint32_t EncodeLRec(uint32_t cflag, uint32_t length) {
53  return (cflag << 29U) | length;
54  }
60  inline static uint32_t DecodeFlag(uint32_t rec) {
61  return (rec >> 29U) & 7U;
62  }
68  inline static uint32_t DecodeLength(uint32_t rec) {
69  return rec & ((1U << 29U) - 1U);
70  }
75  explicit RecordIOWriter(Stream *stream)
76  : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
77  except_counter_(0) {
78  CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
79  }
85  void WriteRecord(const void *buf, size_t size);
90  inline void WriteRecord(const std::string &data) {
91  this->WriteRecord(data.c_str(), data.length());
92  }
97  inline size_t except_counter(void) const {
98  return except_counter_;
99  }
100 
102  inline size_t Tell(void) {
103  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
104  return seek_stream_->Tell();
105  }
106 
107  private:
109  Stream *stream_;
111  SeekStream *seek_stream_;
113  size_t except_counter_;
114 };
120  public:
125  explicit RecordIOReader(Stream *stream)
126  : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
127  end_of_stream_(false) {
128  CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
129  }
135  bool NextRecord(std::string *out_rec);
136 
138  inline void Seek(size_t pos) {
139  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
140  seek_stream_->Seek(pos);
141  }
142 
144  inline size_t Tell(void) {
145  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
146  return seek_stream_->Tell();
147  }
148 
149  private:
151  Stream *stream_;
152  SeekStream *seek_stream_;
154  bool end_of_stream_;
155 };
156 
167  public:
174  explicit RecordIOChunkReader(InputSplit::Blob chunk,
175  unsigned part_index = 0,
176  unsigned num_parts = 1);
186  bool NextRecord(InputSplit::Blob *out_rec);
187 
188  private:
190  std::string temp_;
192  char *pbegin_, *pend_;
193 };
194 
195 } // namespace dmlc
196 #endif // DMLC_RECORDIO_H_
reader of binary recordio to reads in record from stream
Definition: recordio.h:119
RecordIOWriter(Stream *stream)
constructor
Definition: recordio.h:75
size_t Tell(void)
tell the current position of the input stream
Definition: recordio.h:144
a blob of memory region
Definition: io.h:158
void WriteRecord(const std::string &data)
write record to the stream
Definition: recordio.h:90
static uint32_t DecodeLength(uint32_t rec)
decode the length part of lrecord
Definition: recordio.h:68
size_t except_counter(void) const
Definition: recordio.h:97
writer of binary recordio binary format for recordio recordio format: magic lrecord data pad ...
Definition: recordio.h:38
static uint32_t DecodeFlag(uint32_t rec)
decode the flag part of lrecord
Definition: recordio.h:60
void Seek(size_t pos)
seek to certain position of the input stream
Definition: recordio.h:138
interface of stream I/O for serialization
Definition: io.h:30
static const uint32_t kMagic
magic number of recordio note: (kMagic >> 29U) & 7 > 3 this ensures lrec will not be kMagic ...
Definition: recordio.h:45
virtual size_t Tell(void)=0
tell the position of the stream
size_t Tell(void)
tell the current position of the input stream
Definition: recordio.h:102
namespace for dmlc
Definition: array_view.h:12
RecordIOReader(Stream *stream)
constructor
Definition: recordio.h:125
interface of i/o stream that support seek
Definition: io.h:109
void WriteRecord(const void *buf, size_t size)
write record to the stream
reader of binary recordio from Blob returned by InputSplit This class divides the blob into several i...
Definition: recordio.h:166
static uint32_t EncodeLRec(uint32_t cflag, uint32_t length)
encode the lrecord
Definition: recordio.h:52