libcxx/src/include/from_chars_floating_point.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H

// These headers are in the shared LLVM-libc header library.
#include "shared/fp_bits.h"
#include "shared/str_to_float.h"
#include "shared/str_to_integer.h"

#include <__assert>
#include <__config>
#include <cctype>
#include <charconv>
#include <concepts>
#include <limits>

// Included for the _Floating_type_traits class
#include "to_chars_floating_point.h"

_LIBCPP_BEGIN_NAMESPACE_STD

// Parses an infinity string.
// Valid strings are case insensitive and contain INF or INFINITY.
//
// - __first is the first argument to std::from_chars. When the string is invalid
//   this value is returned as ptr in the result.
// - __last is the last argument of std::from_chars.
// - __value is the value argument of std::from_chars,
// - __ptr is the current position is the input string. This is points beyond
//   the initial I character.
// - __negative whether a valid string represents -inf or +inf.
template <floating_point _Fp>
__from_chars_result<_Fp>
__from_chars_floating_point_inf(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
  if (__last - __ptr < 2) [[unlikely]]
    return {_Fp{0}, 0, errc::invalid_argument};

  if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
    return {_Fp{0}, 0, errc::invalid_argument};

  __ptr += 2;

  // At this point the result is valid and contains INF.
  // When the remaining part contains INITY this will be consumed. Otherwise
  // only INF is consumed. For example INFINITZ will consume INF and ignore
  // INITZ.

  if (__last - __ptr >= 5              //
      && std::tolower(__ptr[0]) == 'i' //
      && std::tolower(__ptr[1]) == 'n' //
      && std::tolower(__ptr[2]) == 'i' //
      && std::tolower(__ptr[3]) == 't' //
      && std::tolower(__ptr[4]) == 'y')
    __ptr += 5;

  if constexpr (numeric_limits<_Fp>::has_infinity) {
    if (__negative)
      return {-std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};

    return {std::numeric_limits<_Fp>::infinity(), __ptr - __first, std::errc{}};
  } else {
    return {_Fp{0}, __ptr - __first, errc::result_out_of_range};
  }
}

// Parses a nan string.
// Valid strings are case insensitive and contain INF or INFINITY.
//
// - __first is the first argument to std::from_chars. When the string is invalid
//   this value is returned as ptr in the result.
// - __last is the last argument of std::from_chars.
// - __value is the value argument of std::from_chars,
// - __ptr is the current position is the input string. This is points beyond
//   the initial N character.
// - __negative whether a valid string represents -nan or +nan.
template <floating_point _Fp>
__from_chars_result<_Fp>
__from_chars_floating_point_nan(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
  if (__last - __ptr < 2) [[unlikely]]
    return {_Fp{0}, 0, errc::invalid_argument};

  if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
    return {_Fp{0}, 0, errc::invalid_argument};

  __ptr += 2;

  // At this point the result is valid and contains NAN. When the remaining
  // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
  // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
  // (abcd.
  if (__last - __ptr >= 2 && __ptr[0] == '(') {
    size_t __offset = 1;
    do {
      if (__ptr[__offset] == ')') {
        __ptr += __offset + 1;
        break;
      }
      if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
        break;
      ++__offset;
    } while (__ptr + __offset != __last);
  }

  if (__negative)
    return {-std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};

  return {std::numeric_limits<_Fp>::quiet_NaN(), __ptr - __first, std::errc{}};
}

template <class _Tp>
struct __fractional_constant_result {
  size_t __offset{size_t(-1)};
  _Tp __mantissa{0};
  int __exponent{0};
  bool __truncated{false};
  bool __is_valid{false};
};

// Parses the hex constant part of the hexadecimal floating-point value.
// - input start of buffer given to from_chars
// - __n the number of elements in the buffer
// - __offset where to start parsing. The input can have an optional sign, the
//   offset starts after this sign.
template <class _Tp>
__fractional_constant_result<_Tp> __parse_fractional_hex_constant(const char* __input, size_t __n, size_t __offset) {
  __fractional_constant_result<_Tp> __result;

  const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 16;
  bool __fraction                         = false;
  for (; __offset < __n; ++__offset) {
    if (std::isxdigit(__input[__offset])) {
      __result.__is_valid = true;

      uint32_t __digit = __input[__offset] - '0';
      switch (std::tolower(__input[__offset])) {
      case 'a':
        __digit = 10;
        break;
      case 'b':
        __digit = 11;
        break;
      case 'c':
        __digit = 12;
        break;
      case 'd':
        __digit = 13;
        break;
      case 'e':
        __digit = 14;
        break;
      case 'f':
        __digit = 15;
        break;
      }

      if (__result.__mantissa < __mantissa_truncate_threshold) {
        __result.__mantissa = (__result.__mantissa * 16) + __digit;
        if (__fraction)
          __result.__exponent -= 4;
      } else {
        if (__digit > 0)
          __result.__truncated = true;
        if (!__fraction)
          __result.__exponent += 4;
      }
    } else if (__input[__offset] == '.') {
      if (__fraction)
        break; // this means that __input[__offset] points to a second decimal point, ending the number.

      __fraction = true;
    } else
      break;
  }

  __result.__offset = __offset;
  return __result;
}

struct __exponent_result {
  size_t __offset{size_t(-1)};
  int __value{0};
  bool __present{false};
};

// When the exponent is not present the result of the struct contains
// __offset, 0, false. This allows using the results unconditionally, the
// __present is important for the scientific notation, where the value is
// mandatory.
__exponent_result __parse_exponent(const char* __input, size_t __n, size_t __offset, char __marker) {
  if (__offset + 1 < __n &&                          // an exponent always needs at least one digit.
      std::tolower(__input[__offset]) == __marker && //
      !std::isspace(__input[__offset + 1])           // leading whitespace is not allowed.
  ) {
    ++__offset;
    LIBC_NAMESPACE::shared::StrToNumResult<int32_t> __e =
        LIBC_NAMESPACE::shared::strtointeger<int32_t>(__input + __offset, 10, __n - __offset);
    // __result.error contains the errno value, 0 or ERANGE these are not interesting.
    // If the number of characters parsed is 0 it means there was no number.
    if (__e.parsed_len != 0)
      return {__offset + __e.parsed_len, __e.value, true};
    else
      --__offset; // the assumption of a valid exponent was not true, undo eating the exponent character.
  }

  return {__offset, 0, false};
}

// Here we do this operation as int64 to avoid overflow.
int32_t __merge_exponents(int64_t __fractional, int64_t __exponent, int __max_biased_exponent) {
  int64_t __sum = __fractional + __exponent;

  if (__sum > __max_biased_exponent)
    return __max_biased_exponent;

  if (__sum < -__max_biased_exponent)
    return -__max_biased_exponent;

  return __sum;
}

template <class _Fp, class _Tp>
__from_chars_result<_Fp>
__calculate_result(_Tp __mantissa, int __exponent, bool __negative, __from_chars_result<_Fp> __result) {
  auto __r = LIBC_NAMESPACE::shared::FPBits<_Fp>();
  __r.set_mantissa(__mantissa);
  __r.set_biased_exponent(__exponent);

  // C17 7.12.1/6
  // The result underflows if the magnitude of the mathematical result is so
  // small that the mathematical result cannot be represented, without
  // extraordinary roundoff error, in an object of the specified type.237) If
  // the result underflows, the function returns an implementation-defined
  // value whose magnitude is no greater than the smallest normalized positive
  // number in the specified type; if the integer expression math_errhandling
  // & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
  // implementation-defined; if the integer expression math_errhandling &
  // MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
  // exception is raised is implementation-defined.
  //
  // LLVM-LIBC sets ERAGNE for subnormal values
  //
  // [charconv.from.chars]/1
  //   ... If the parsed value is not in the range representable by the type of
  //   value, value is unmodified and the member ec of the return value is
  //   equal to errc::result_out_of_range. ...
  //
  // Undo the ERANGE for subnormal values.
  if (__result.__ec == errc::result_out_of_range && __r.is_subnormal() && !__r.is_zero())
    __result.__ec = errc{};

  if (__negative)
    __result.__value = -__r.get_val();
  else
    __result.__value = __r.get_val();

  return __result;
}

// Implements from_chars for decimal floating-point values.
// __first forwarded from from_chars
// __last forwarded from from_chars
// __value forwarded from from_chars
// __fmt forwarded from from_chars
// __ptr the start of the buffer to parse. This is after the optional sign character.
// __negative should __value be set to a negative value?
//
// This function and __from_chars_floating_point_decimal are similar. However
// the similar parts are all in helper functions. So the amount of code
// duplication is minimal.
template <floating_point _Fp>
__from_chars_result<_Fp>
__from_chars_floating_point_hex(const char* const __first, const char* __last, const char* __ptr, bool __negative) {
  size_t __n         = __last - __first;
  ptrdiff_t __offset = __ptr - __first;

  auto __fractional =
      std::__parse_fractional_hex_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
  if (!__fractional.__is_valid)
    return {_Fp{0}, 0, errc::invalid_argument};

  auto __parsed_exponent = std::__parse_exponent(__first, __n, __fractional.__offset, 'p');
  __offset               = __parsed_exponent.__offset;
  int __exponent         = std::__merge_exponents(
      __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);

  __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
  LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
  if (__fractional.__mantissa != 0) {
    auto __temp = LIBC_NAMESPACE::shared::binary_exp_to_float<_Fp>(
        {__fractional.__mantissa, __exponent},
        __fractional.__truncated,
        LIBC_NAMESPACE::shared::RoundDirection::Nearest);
    __expanded_float = __temp.num;
    if (__temp.error == ERANGE) {
      __result.__ec = errc::result_out_of_range;
    }
  }

  return std::__calculate_result<_Fp>(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
}

// Parses the hex constant part of the decimal float value.
// - input start of buffer given to from_chars
// - __n the number of elements in the buffer
// - __offset where to start parsing. The input can have an optional sign, the
//   offset starts after this sign.
template <class _Tp>
__fractional_constant_result<_Tp>
__parse_fractional_decimal_constant(const char* __input, ptrdiff_t __n, ptrdiff_t __offset) {
  __fractional_constant_result<_Tp> __result;

  const _Tp __mantissa_truncate_threshold = numeric_limits<_Tp>::max() / 10;
  bool __fraction                         = false;
  for (; __offset < __n; ++__offset) {
    if (std::isdigit(__input[__offset])) {
      __result.__is_valid = true;

      uint32_t __digit = __input[__offset] - '0';
      if (__result.__mantissa < __mantissa_truncate_threshold) {
        __result.__mantissa = (__result.__mantissa * 10) + __digit;
        if (__fraction)
          --__result.__exponent;
      } else {
        if (__digit > 0)
          __result.__truncated = true;
        if (!__fraction)
          ++__result.__exponent;
      }
    } else if (__input[__offset] == '.') {
      if (__fraction)
        break; // this means that __input[__offset] points to a second decimal point, ending the number.

      __fraction = true;
    } else
      break;
  }

  __result.__offset = __offset;
  return __result;
}

// Implements from_chars for decimal floating-point values.
// __first forwarded from from_chars
// __last forwarded from from_chars
// __value forwarded from from_chars
// __fmt forwarded from from_chars
// __ptr the start of the buffer to parse. This is after the optional sign character.
// __negative should __value be set to a negative value?
template <floating_point _Fp>
__from_chars_result<_Fp> __from_chars_floating_point_decimal(
    const char* const __first, const char* __last, chars_format __fmt, const char* __ptr, bool __negative) {
  ptrdiff_t __n      = __last - __first;
  ptrdiff_t __offset = __ptr - __first;

  auto __fractional =
      std::__parse_fractional_decimal_constant<typename _Floating_type_traits<_Fp>::_Uint_type>(__first, __n, __offset);
  if (!__fractional.__is_valid)
    return {_Fp{0}, 0, errc::invalid_argument};

  __offset = __fractional.__offset;

  // LWG3456 Pattern used by std::from_chars is underspecified
  // This changes fixed to ignore a possible exponent instead of making its
  // existance an error.
  int __exponent;
  if (__fmt == chars_format::fixed) {
    __exponent =
        std::__merge_exponents(__fractional.__exponent, 0, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
  } else {
    auto __parsed_exponent = std::__parse_exponent(__first, __n, __offset, 'e');
    if (__fmt == chars_format::scientific && !__parsed_exponent.__present) {
      // [charconv.from.chars]/6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
      // the otherwise optional exponent part shall appear;
      return {_Fp{0}, 0, errc::invalid_argument};
    }

    __offset   = __parsed_exponent.__offset;
    __exponent = std::__merge_exponents(
        __fractional.__exponent, __parsed_exponent.__value, LIBC_NAMESPACE::shared::FPBits<_Fp>::MAX_BIASED_EXPONENT);
  }

  __from_chars_result<_Fp> __result{_Fp{0}, __offset, {}};
  LIBC_NAMESPACE::shared::ExpandedFloat<_Fp> __expanded_float = {0, 0};
  if (__fractional.__mantissa != 0) {
    // This function expects to parse a positive value. This means it does not
    // take a __first, __n as arguments, since __first points to '-' for
    // negative values.
    auto __temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
        {__fractional.__mantissa, __exponent},
        __fractional.__truncated,
        LIBC_NAMESPACE::shared::RoundDirection::Nearest,
        __ptr,
        __last - __ptr);
    __expanded_float = __temp.num;
    if (__temp.error == ERANGE) {
      __result.__ec = errc::result_out_of_range;
    }
  }

  return std::__calculate_result(__expanded_float.mantissa, __expanded_float.exponent, __negative, __result);
}

template <floating_point _Fp>
__from_chars_result<_Fp>
__from_chars_floating_point_impl(const char* const __first, const char* __last, chars_format __fmt) {
  if (__first == __last) [[unlikely]]
    return {_Fp{0}, 0, errc::invalid_argument};

  const char* __ptr = __first;
  bool __negative   = *__ptr == '-';
  if (__negative) {
    ++__ptr;
    if (__ptr == __last) [[unlikely]]
      return {_Fp{0}, 0, errc::invalid_argument};
  }

  // [charconv.from.chars]
  //   [Note 1: If the pattern allows for an optional sign, but the string has
  //   no digit characters following the sign, no characters match the pattern.
  //   -- end note]
  // This is true for integrals, floating point allows -.0

  // [charconv.from.chars]/6.2
  //   if fmt has chars_format::scientific set but not chars_format::fixed, the
  //   otherwise optional exponent part shall appear;
  // Since INF/NAN do not have an exponent this value is not valid.
  //
  // LWG3456 Pattern used by std::from_chars is underspecified
  // Does not address this point, but proposed option B does solve this issue,
  // Both MSVC STL and libstdc++ implement this this behaviour.
  switch (std::tolower(*__ptr)) {
  case 'i':
    return std::__from_chars_floating_point_inf<_Fp>(__first, __last, __ptr + 1, __negative);
  case 'n':
    if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
      // NOTE: The pointer passed here will be parsed in the default C locale.
      // This is standard behavior (see https://eel.is/c++draft/charconv.from.chars), but may be unexpected.
      return std::__from_chars_floating_point_nan<_Fp>(__first, __last, __ptr + 1, __negative);
    return {_Fp{0}, 0, errc::invalid_argument};
  }

  if (__fmt == chars_format::hex)
    return std::__from_chars_floating_point_hex<_Fp>(__first, __last, __ptr, __negative);

  return std::__from_chars_floating_point_decimal<_Fp>(__first, __last, __fmt, __ptr, __negative);
}

_LIBCPP_END_NAMESPACE_STD

#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H