mxnet
recordio.h
Go to the documentation of this file.
1 
8 #ifndef DMLC_RECORDIO_H_
9 #define DMLC_RECORDIO_H_
10 #include <cstring>
11 #include <string>
12 #include "./io.h"
13 #include "./logging.h"
14 
15 namespace dmlc {
39  public:
45  static const uint32_t kMagic = 0xced7230a;
52  inline static uint32_t EncodeLRec(uint32_t cflag, uint32_t length) {
53  return (cflag << 29U) | length;
54  }
60  inline static uint32_t DecodeFlag(uint32_t rec) {
61  return (rec >> 29U) & 7U;
62  }
68  inline static uint32_t DecodeLength(uint32_t rec) {
69  return rec & ((1U << 29U) - 1U);
70  }
75  explicit RecordIOWriter(Stream *stream)
76  : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
77  except_counter_(0) {
78  CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
79  }
85  void WriteRecord(const void *buf, size_t size);
90  inline void WriteRecord(const std::string &data) {
91  this->WriteRecord(data.c_str(), data.length());
92  }
97  inline size_t except_counter(void) const {
98  return except_counter_;
99  }
100 
102  inline size_t Tell(void) {
103  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
104  return seek_stream_->Tell();
105  }
106 
107  private:
109  Stream *stream_;
111  SeekStream *seek_stream_;
113  size_t except_counter_;
114 };
120  public:
125  explicit RecordIOReader(Stream *stream)
126  : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
127  end_of_stream_(false) {
128  CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
129  }
135  bool NextRecord(std::string *out_rec);
136 
138  inline void Seek(size_t pos) {
139  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
140  seek_stream_->Seek(pos);
141  }
142 
144  inline size_t Tell(void) {
145  CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
146  return seek_stream_->Tell();
147  }
148 
149  private:
151  Stream *stream_;
152  SeekStream *seek_stream_;
154  bool end_of_stream_;
155 };
156 
167  public:
174  explicit RecordIOChunkReader(InputSplit::Blob chunk,
175  unsigned part_index = 0,
176  unsigned num_parts = 1);
186  bool NextRecord(InputSplit::Blob *out_rec);
187 
188  private:
190  std::string temp_;
192  char *pbegin_, *pend_;
193 };
194 
195 } // namespace dmlc
196 #endif // DMLC_RECORDIO_H_
dmlc::RecordIOWriter::kMagic
static const uint32_t kMagic
magic number of recordio note: (kMagic >> 29U) & 7 > 3 this ensures lrec will not be kMagic
Definition: recordio.h:45
dmlc::RecordIOWriter::except_counter
size_t except_counter(void) const
Definition: recordio.h:97
dmlc::RecordIOWriter::Tell
size_t Tell(void)
tell the current position of the input stream
Definition: recordio.h:102
dmlc::RecordIOReader::RecordIOReader
RecordIOReader(Stream *stream)
constructor
Definition: recordio.h:125
dmlc::RecordIOReader
reader of binary recordio to reads in record from stream
Definition: recordio.h:119
dmlc
namespace for dmlc
Definition: array_view.h:12
dmlc::InputSplit::Blob
a blob of memory region
Definition: io.h:158
dmlc::SeekStream::Tell
virtual size_t Tell(void)=0
tell the position of the stream
dmlc::RecordIOChunkReader
reader of binary recordio from Blob returned by InputSplit This class divides the blob into several i...
Definition: recordio.h:166
dmlc::RecordIOWriter::EncodeLRec
static uint32_t EncodeLRec(uint32_t cflag, uint32_t length)
encode the lrecord
Definition: recordio.h:52
dmlc::RecordIOWriter::WriteRecord
void WriteRecord(const void *buf, size_t size)
write record to the stream
dmlc::RecordIOWriter::RecordIOWriter
RecordIOWriter(Stream *stream)
constructor
Definition: recordio.h:75
dmlc::SeekStream
interface of i/o stream that support seek
Definition: io.h:109
dmlc::RecordIOReader::Tell
size_t Tell(void)
tell the current position of the input stream
Definition: recordio.h:144
io.h
defines serializable interface of dmlc
dmlc::RecordIOWriter::DecodeLength
static uint32_t DecodeLength(uint32_t rec)
decode the length part of lrecord
Definition: recordio.h:68
dmlc::RecordIOWriter::WriteRecord
void WriteRecord(const std::string &data)
write record to the stream
Definition: recordio.h:90
dmlc::RecordIOWriter::DecodeFlag
static uint32_t DecodeFlag(uint32_t rec)
decode the flag part of lrecord
Definition: recordio.h:60
dmlc::RecordIOWriter
writer of binary recordio binary format for recordio recordio format: magic lrecord data pad
Definition: recordio.h:38
dmlc::RecordIOReader::NextRecord
bool NextRecord(std::string *out_rec)
read next complete record from stream
dmlc::RecordIOReader::Seek
void Seek(size_t pos)
seek to certain position of the input stream
Definition: recordio.h:138
dmlc::RecordIOChunkReader::NextRecord
bool NextRecord(InputSplit::Blob *out_rec)
read next complete record from stream the blob contains the memory content NOTE: this function is not...
dmlc::Stream
interface of stream I/O for serialization
Definition: io.h:30
dmlc::RecordIOChunkReader::RecordIOChunkReader
RecordIOChunkReader(InputSplit::Blob chunk, unsigned part_index=0, unsigned num_parts=1)
constructor
dmlc::SeekStream::Seek
virtual void Seek(size_t pos)=0
seek to certain position of the file