mxnet
strtonum.h
Go to the documentation of this file.
1 
6 #ifndef DMLC_STRTONUM_H_
7 #define DMLC_STRTONUM_H_
8 
9 #if DMLC_USE_CXX11
10 #include <type_traits>
11 #endif
12 
13 #include <string>
14 #include <limits>
15 #include <cstdint>
16 #include "./base.h"
17 #include "./logging.h"
18 
19 namespace dmlc {
26 inline bool isspace(char c) {
27  return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
28 }
29 
36 inline bool isblank(char c) {
37  return (c == ' ' || c == '\t');
38 }
39 
46 inline bool isdigit(char c) {
47  return (c >= '0' && c <= '9');
48 }
49 
56 inline bool isalpha(char c) {
57  static_assert(
58  static_cast<int>('A') == 65 && static_cast<int>('Z' - 'A') == 25,
59  "Only system with ASCII character set is supported");
60  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
61 }
62 
70 inline bool isdigitchars(char c) {
71  return (c >= '0' && c <= '9')
72  || c == '+' || c == '-'
73  || c == '.'
74  || c == 'e' || c == 'E';
75 }
76 
81 const int kStrtofMaxDigits = 19;
82 
98 template <typename FloatType, bool CheckRange = false>
99 inline FloatType ParseFloat(const char* nptr, char** endptr) {
100 #if DMLC_USE_CXX11
101  static_assert(std::is_same<FloatType, double>::value
102  || std::is_same<FloatType, float>::value,
103  "ParseFloat is defined only for 'float' and 'double' types");
104  constexpr unsigned kMaxExponent
105  = (std::is_same<FloatType, double>::value ? 308U : 38U);
106  constexpr FloatType kMaxSignificandForMaxExponent
107  = static_cast<FloatType>(std::is_same<FloatType, double>::value
108  ? 1.79769313486231570 : 3.402823466);
109  // If a floating-point value has kMaxExponent, what is
110  // the largest possible significand value?
111  constexpr FloatType kMaxSignificandForNegMaxExponent
112  = static_cast<FloatType>(std::is_same<FloatType, double>::value
113  ? 2.22507385850720139 : 1.175494351);
114  // If a floating-point value has -kMaxExponent, what is
115  // the largest possible significand value?
116 #else
117  const unsigned kMaxExponent
118  = (sizeof(FloatType) == sizeof(double) ? 308U : 38U);
119  const FloatType kMaxSignificandForMaxExponent
120  = static_cast<FloatType>(sizeof(FloatType) == sizeof(double)
121  ? 1.79769313486231570 : 3.402823466);
122  const FloatType kMaxSignificandForNegMaxExponent
123  = static_cast<FloatType>(sizeof(FloatType) == sizeof(double)
124  ? 2.22507385850720139 : 1.175494351);
125 #endif
126 
127  const char *p = nptr;
128  // Skip leading white space, if any. Not necessary
129  while (isspace(*p) ) ++p;
130 
131  // Get sign, if any.
132  bool sign = true;
133  if (*p == '-') {
134  sign = false; ++p;
135  } else if (*p == '+') {
136  ++p;
137  }
138 
139  // Handle INF and NAN
140  {
141  int i = 0;
142  // case-insensitive match for INF and INFINITY
143  while (i < 8 && static_cast<char>((*p) | 32) == "infinity"[i]) {
144  ++i; ++p;
145  }
146  if (i == 3 || i == 8) {
147  if (endptr) *endptr = (char*)p; // NOLINT(*)
148  return sign ? std::numeric_limits<FloatType>::infinity()
149  : -std::numeric_limits<FloatType>::infinity();
150  } else {
151  p -= i;
152  }
153 
154  // case-insensitive match for NAN
155  i = 0;
156  while (i < 3 && static_cast<char>((*p) | 32) == "nan"[i]) {
157  ++i; ++p;
158  }
159  if (i == 3) {
160  // Got NAN; check if the value is of form NAN(char_sequence)
161  if (*p == '(') {
162  ++p;
163  while (isdigit(*p) || isalpha(*p) || *p == '_') ++p;
164  CHECK_EQ(*p, ')') << "Invalid NAN literal";
165  ++p;
166  }
167  static_assert(std::numeric_limits<FloatType>::has_quiet_NaN,
168  "Only system with quiet NaN is supported");
169  if (endptr) *endptr = (char*)p; // NOLINT(*)
170  return std::numeric_limits<FloatType>::quiet_NaN();
171  } else {
172  p -= i;
173  }
174  }
175 
176  // Get digits before decimal point or exponent, if any.
177  uint64_t predec; // to store digits before decimal point
178  for (predec = 0; isdigit(*p); ++p) {
179  predec = predec * 10ULL + static_cast<uint64_t>(*p - '0');
180  }
181  FloatType value = static_cast<FloatType>(predec);
182 
183  // Get digits after decimal point, if any.
184  if (*p == '.') {
185  uint64_t pow10 = 1;
186  uint64_t val2 = 0;
187  int digit_cnt = 0;
188  ++p;
189  while (isdigit(*p)) {
190  if (digit_cnt < kStrtofMaxDigits) {
191  val2 = val2 * 10ULL + static_cast<uint64_t>(*p - '0');
192  pow10 *= 10ULL;
193  } // when kStrtofMaxDigits is read, ignored following digits
194  ++p;
195  ++digit_cnt;
196  }
197  value += static_cast<FloatType>(
198  static_cast<double>(val2) / static_cast<double>(pow10));
199  }
200 
201  // Handle exponent, if any.
202  if ((*p == 'e') || (*p == 'E')) {
203  ++p;
204  bool frac = false;
205  FloatType scale = static_cast<FloatType>(1.0f);
206  unsigned expon;
207  // Get sign of exponent, if any.
208  if (*p == '-') {
209  frac = true;
210  ++p;
211  } else if (*p == '+') {
212  ++p;
213  }
214  // Get digits of exponent, if any.
215  for (expon = 0; isdigit(*p); ++p) {
216  expon = expon * 10U + static_cast<unsigned>(*p - '0');
217  }
218  if (expon > kMaxExponent) { // out of range, clip or raise error
219  if (CheckRange) {
220  errno = ERANGE;
221  if (endptr) *endptr = (char*)p; // NOLINT(*)
222  return std::numeric_limits<FloatType>::infinity();
223  } else {
224  expon = kMaxExponent;
225  }
226  }
227  // handle edge case where exponent is exactly kMaxExponent
228  if (expon == kMaxExponent
229  && ((!frac && value > kMaxSignificandForMaxExponent)
230  || (frac && value < kMaxSignificandForNegMaxExponent))) {
231  if (CheckRange) {
232  errno = ERANGE;
233  if (endptr) *endptr = (char*)p; // NOLINT(*)
234  return std::numeric_limits<FloatType>::infinity();
235  } else {
236  value = (frac ? kMaxSignificandForNegMaxExponent
237  : kMaxSignificandForMaxExponent);
238  }
239  }
240  // Calculate scaling factor.
241  while (expon >= 8U) { scale *= static_cast<FloatType>(1E8f); expon -= 8U; }
242  while (expon > 0U) { scale *= static_cast<FloatType>(10.0f); expon -= 1U; }
243  // Return signed and scaled floating point result.
244  value = frac ? (value / scale) : (value * scale);
245  }
246  // Consume 'f' suffix, if any
247  if (*p == 'f' || *p == 'F') {
248  ++p;
249  }
250 
251  if (endptr) *endptr = (char*)p; // NOLINT(*)
252  return sign ? value : - value;
253 }
254 
268 inline float strtof(const char* nptr, char** endptr) {
269  return ParseFloat<float>(nptr, endptr);
270 }
271 
286 inline float strtof_check_range(const char* nptr, char** endptr) {
287  return ParseFloat<float, true>(nptr, endptr);
288 }
289 
303 inline double strtod(const char* nptr, char** endptr) {
304  return ParseFloat<double>(nptr, endptr);
305 }
306 
321 inline double strtod_check_range(const char* nptr, char** endptr) {
322  return ParseFloat<double, true>(nptr, endptr);
323 }
324 
336 template <typename SignedIntType>
337 inline SignedIntType ParseSignedInt(const char* nptr, char** endptr, int base) {
338 #ifdef DMLC_USE_CXX11
339  static_assert(std::is_signed<SignedIntType>::value
340  && std::is_integral<SignedIntType>::value,
341  "ParseSignedInt is defined for signed integers only");
342 #endif
343  CHECK(base <= 10 && base >= 2);
344  const char* p = nptr;
345  // Skip leading white space, if any. Not necessary
346  while (isspace(*p) ) ++p;
347 
348  // Get sign if any
349  bool sign = true;
350  if (*p == '-') {
351  sign = false; ++p;
352  } else if (*p == '+') {
353  ++p;
354  }
355 
356  SignedIntType value;
357  const SignedIntType base_val = static_cast<SignedIntType>(base);
358  for (value = 0; isdigit(*p); ++p) {
359  value = value * base_val + static_cast<SignedIntType>(*p - '0');
360  }
361 
362  if (endptr) *endptr = (char*)p; // NOLINT(*)
363  return sign ? value : - value;
364 }
365 
377 template <typename UnsignedIntType>
378 inline UnsignedIntType ParseUnsignedInt(const char* nptr, char** endptr, int base) {
379 #ifdef DMLC_USE_CXX11
380  static_assert(std::is_unsigned<UnsignedIntType>::value
381  && std::is_integral<UnsignedIntType>::value,
382  "ParseUnsignedInt is defined for unsigned integers only");
383 #endif
384  CHECK(base <= 10 && base >= 2);
385  const char *p = nptr;
386  // Skip leading white space, if any. Not necessary
387  while (isspace(*p)) ++p;
388 
389  // Get sign if any
390  bool sign = true;
391  if (*p == '-') {
392  sign = false; ++p;
393  } else if (*p == '+') {
394  ++p;
395  }
396 
397  // we are parsing unsigned, so no minus sign should be found
398  CHECK_EQ(sign, true);
399 
400  UnsignedIntType value;
401  const UnsignedIntType base_val = static_cast<UnsignedIntType>(base);
402  for (value = 0; isdigit(*p); ++p) {
403  value = value * base_val + static_cast<UnsignedIntType>(*p - '0');
404  }
405 
406  if (endptr) *endptr = (char*)p; // NOLINT(*)
407  return value;
408 }
409 
422 inline uint64_t strtoull(const char* nptr, char **endptr, int base) {
423  return ParseUnsignedInt<uint64_t>(nptr, endptr, base);
424 }
425 
434 inline long atol(const char* p) { // NOLINT(*)
435  return ParseSignedInt<long>(p, 0, 10); // NOLINT(*)
436 }
437 
448 inline float atof(const char* nptr) {
449  return strtof(nptr, 0);
450 }
451 
467 inline float stof(const std::string& value, size_t* pos = nullptr) {
468  const char* str_source = value.c_str();
469  char* endptr;
470  const float parsed_value = dmlc::strtof_check_range(str_source, &endptr);
471  if (errno == ERANGE && parsed_value == std::numeric_limits<float>::infinity()) {
472  throw std::out_of_range("Out of range value");
473  } else if (const_cast<const char*>(endptr) == str_source) {
474  throw std::invalid_argument("No conversion could be performed");
475  }
476  if (pos) {
477  *pos = static_cast<size_t>(const_cast<const char*>(endptr) - str_source);
478  }
479  return parsed_value;
480 }
481 
497 inline double stod(const std::string& value, size_t* pos = nullptr) {
498  const char* str_source = value.c_str();
499  char* endptr;
500  const double parsed_value = dmlc::strtod_check_range(str_source, &endptr);
501  if (errno == ERANGE && parsed_value == std::numeric_limits<double>::infinity()) {
502  throw std::out_of_range("Out of range value");
503  } else if (const_cast<const char*>(endptr) == str_source) {
504  throw std::invalid_argument("No conversion could be performed");
505  }
506  if (pos) {
507  *pos = static_cast<size_t>(const_cast<const char*>(endptr) - str_source);
508  }
509  return parsed_value;
510 }
511 
518 template<typename T>
519 class Str2T {
520  public:
527  static inline T get(const char * begin, const char * end);
528 };
529 
537 template<typename T>
538 inline T Str2Type(const char * begin, const char * end) {
539  return Str2T<T>::get(begin, end);
540 }
541 
545 template<>
546 class Str2T<int32_t> {
547  public:
554  static inline int32_t get(const char * begin, const char * end) {
555  return ParseSignedInt<int32_t>(begin, NULL, 10);
556  }
557 };
558 
562 template<>
563 class Str2T<uint32_t> {
564  public:
571  static inline uint32_t get(const char* begin, const char* end) {
572  return ParseUnsignedInt<uint32_t>(begin, NULL, 10);
573  }
574 };
575 
579 template<>
580 class Str2T<int64_t> {
581  public:
588  static inline int64_t get(const char * begin, const char * end) {
589  return ParseSignedInt<int64_t>(begin, NULL, 10);
590  }
591 };
592 
596 template<>
597 class Str2T<uint64_t> {
598  public:
605  static inline uint64_t get(const char * begin, const char * end) {
606  return ParseUnsignedInt<uint64_t>(begin, NULL, 10);
607  }
608 };
609 
613 template<>
614 class Str2T<float> {
615  public:
622  static inline float get(const char * begin, const char * end) {
623  return atof(begin);
624  }
625 };
626 
630 template<>
631 class Str2T<double> {
632  public:
639  static inline double get(const char * begin, const char * end) {
640  return strtod(begin, 0);
641  }
642 };
643 
655 template<typename T1, typename T2>
656 inline int ParsePair(const char * begin, const char * end,
657  const char ** endptr, T1 &v1, T2 &v2) { // NOLINT(*)
658  const char * p = begin;
659  while (p != end && !isdigitchars(*p)) ++p;
660  if (p == end) {
661  *endptr = end;
662  return 0;
663  }
664  const char * q = p;
665  while (q != end && isdigitchars(*q)) ++q;
666  v1 = Str2Type<T1>(p, q);
667  p = q;
668  while (p != end && isblank(*p)) ++p;
669  if (p == end || *p != ':') {
670  // only v1
671  *endptr = p;
672  return 1;
673  }
674  p++;
675  while (p != end && !isdigitchars(*p)) ++p;
676  q = p;
677  while (q != end && isdigitchars(*q)) ++q;
678  *endptr = q;
679  v2 = Str2Type<T2>(p, q);
680  return 2;
681 }
682 
696 template<typename T1, typename T2, typename T3>
697 inline int ParseTriple(const char * begin, const char * end,
698  const char ** endptr, T1 &v1, T2 &v2, T3 &v3) { // NOLINT(*)
699  const char * p = begin;
700  while (p != end && !isdigitchars(*p)) ++p;
701  if (p == end) {
702  *endptr = end;
703  return 0;
704  }
705  const char * q = p;
706  while (q != end && isdigitchars(*q)) ++q;
707  v1 = Str2Type<T1>(p, q);
708  p = q;
709  while (p != end && isblank(*p)) ++p;
710  if (p == end || *p != ':') {
711  // only v1
712  *endptr = p;
713  return 1;
714  }
715  p++;
716  while (p != end && !isdigitchars(*p)) ++p;
717  q = p;
718  while (q != end && isdigitchars(*q)) ++q;
719  v2 = Str2Type<T2>(p, q);
720  p = q;
721  while (p != end && isblank(*p)) ++p;
722  if (p == end || *p != ':') {
723  // only v1:v2
724  *endptr = p;
725  return 2;
726  }
727  p++;
728  while (p != end && !isdigitchars(*p)) ++p;
729  q = p;
730  while (q != end && isdigitchars(*q)) ++q;
731  *endptr = q;
732  v3 = Str2Type<T3>(p, q);
733  return 3;
734 }
735 } // namespace dmlc
736 
737 #endif // DMLC_STRTONUM_H_
FloatType ParseFloat(const char *nptr, char **endptr)
Common implementation for dmlc::strtof() and dmlc::strtod() TODO: the current version does not suppor...
Definition: strtonum.h:99
double strtod_check_range(const char *nptr, char **endptr)
A faster implementation of strtod(). See documentation of std::strtod() for more information. This function will check for overflow. If the converted value is outside the range for the double type, errno is set to ERANGE and HUGE_VAL is returned. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:321
double stod(const std::string &value, size_t *pos=nullptr)
A faster implementation of stod(). See documentation of std::stod() for more information. This function will test for overflow and invalid arguments. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:497
UnsignedIntType ParseUnsignedInt(const char *nptr, char **endptr, int base)
A fast string-to-integer convertor, for unsigned integers TODO: the current version supports only bas...
Definition: strtonum.h:378
Interface class that defines a single method get() to convert a string into type T. Define template specialization of this class to define the conversion method for a particular type.
Definition: strtonum.h:519
bool isspace(char c)
Inline implementation of isspace(). Tests whether the given character is a whitespace letter...
Definition: strtonum.h:26
namespace for dmlc
Definition: array_view.h:12
SignedIntType ParseSignedInt(const char *nptr, char **endptr, int base)
A fast string-to-integer convertor, for signed integers TODO: the current version supports only base ...
Definition: strtonum.h:337
float stof(const std::string &value, size_t *pos=nullptr)
A faster implementation of stof(). See documentation of std::stof() for more information. This function will test for overflow and invalid arguments. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:467
float strtof_check_range(const char *nptr, char **endptr)
A faster implementation of strtof(). See documentation of std::strtof() for more information. This function will check for overflow. If the converted value is outside the range for the float type, errno is set to ERANGE and HUGE_VALF is returned. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:286
uint64_t strtoull(const char *nptr, char **endptr, int base)
A faster implementation of strtoull(). See documentation of std::strtoull() for more information...
Definition: strtonum.h:422
bool isdigitchars(char c)
Tests whether the given character is a valid letter in the string representation of a floating-point ...
Definition: strtonum.h:70
const int kStrtofMaxDigits
Maximum number of decimal digits dmlc::strtof() / dmlc::strtod() will process. Trailing digits will b...
Definition: strtonum.h:81
int ParsePair(const char *begin, const char *end, const char **endptr, T1 &v1, T2 &v2)
Parse colon seperated pair v1[:v2].
Definition: strtonum.h:656
bool isblank(char c)
Inline implementation of isblank(). Tests whether the given character is a space or tab character...
Definition: strtonum.h:36
int ParseTriple(const char *begin, const char *end, const char **endptr, T1 &v1, T2 &v2, T3 &v3)
Parse colon seperated triple v1:v2[:v3].
Definition: strtonum.h:697
long atol(const char *p)
A faster implementation of atol(). See documentation of std::atol() for more information. This function will use base 10. Note that this function does not check for overflow.
Definition: strtonum.h:434
float atof(const char *nptr)
A faster implementation of atof(). Unlike std::atof(), this function returns float type...
Definition: strtonum.h:448
float strtof(const char *nptr, char **endptr)
A faster implementation of strtof(). See documentation of std::strtof() for more information. Note that this function does not check for overflow. Use strtof_check_range() to check for overflow. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:268
bool isalpha(char c)
Inline implementation of isalpha(). Tests whether the given character is an alphabet letter...
Definition: strtonum.h:56
T Str2Type(const char *begin, const char *end)
Convenience function for converting string into type T.
Definition: strtonum.h:538
static T get(const char *begin, const char *end)
Convert a string into type T.
bool isdigit(char c)
Inline implementation of isdigit(). Tests whether the given character is a decimal digit...
Definition: strtonum.h:46
double strtod(const char *nptr, char **endptr)
A faster implementation of strtod(). See documentation of std::strtof() for more information. Note that this function does not check for overflow. Use strtod_check_range() to check for overflow. TODO: the current version does not support hex number TODO: the current version does not handle long decimals: you may only have up to 19 digits after the decimal point, and you cannot have too many digits before the decimal point either.
Definition: strtonum.h:303