diff options
Diffstat (limited to 'libc/src')
| -rw-r--r-- | libc/src/__support/CMakeLists.txt | 12 | ||||
| -rw-r--r-- | libc/src/__support/str_to_integer.h | 98 | ||||
| -rw-r--r-- | libc/src/__support/wcs_to_integer.h | 155 | ||||
| -rw-r--r-- | libc/src/wchar/CMakeLists.txt | 8 | ||||
| -rw-r--r-- | libc/src/wchar/wcstol.cpp | 4 | ||||
| -rw-r--r-- | libc/src/wchar/wcstoll.cpp | 4 | ||||
| -rw-r--r-- | libc/src/wchar/wcstoul.cpp | 4 | ||||
| -rw-r--r-- | libc/src/wchar/wcstoull.cpp | 4 |
8 files changed, 81 insertions, 208 deletions
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 0ef09a9..b7af751 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -179,19 +179,7 @@ add_header_library( DEPENDS .ctype_utils .str_to_num_result - libc.hdr.errno_macros - libc.src.__support.CPP.limits - libc.src.__support.CPP.type_traits - libc.src.__support.common -) - -add_header_library( - wcs_to_integer - HDRS - wcs_to_integer.h - DEPENDS .wctype_utils - .str_to_num_result libc.hdr.errno_macros libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h index d332c92..ba3f49f 100644 --- a/libc/src/__support/str_to_integer.h +++ b/libc/src/__support/str_to_integer.h @@ -25,36 +25,63 @@ #include "src/__support/macros/config.h" #include "src/__support/str_to_num_result.h" #include "src/__support/uint128.h" +#include "src/__support/wctype_utils.h" namespace LIBC_NAMESPACE_DECL { namespace internal { // Returns the idx to the first character in src that is not a whitespace -// character (as determined by isspace()) +// character (as determined by isspace() / iswspace()) +template <typename CharType> LIBC_INLINE size_t -first_non_whitespace(const char *__restrict src, +first_non_whitespace(const CharType *__restrict src, size_t src_len = cpp::numeric_limits<size_t>::max()) { size_t src_cur = 0; - while (src_cur < src_len && internal::isspace(src[src_cur])) { + while (src_cur < src_len) { + if constexpr (cpp::is_same_v<CharType, char>) { + if (!internal::isspace(src[src_cur])) + break; + } else { + if (!internal::iswspace(src[src_cur])) + break; + } ++src_cur; } return src_cur; } +// Returns +1, -1, or 0 if 'src' starts with (respectively) +// plus sign, minus sign, or neither. +template <typename CharType> +LIBC_INLINE static int get_sign(const CharType *__restrict src) { + if constexpr (cpp::is_same_v<CharType, char>) { + return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0); + } else { + return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0); + } +} + // checks if the next 3 characters of the string pointer are the start of a // hexadecimal number. Does not advance the string pointer. -LIBC_INLINE bool -is_hex_start(const char *__restrict src, - size_t src_len = cpp::numeric_limits<size_t>::max()) { +template <typename CharType> +LIBC_INLINE static bool is_hex_start(const CharType *__restrict src, + size_t src_len) { if (src_len < 3) return false; - return *src == '0' && tolower(*(src + 1)) == 'x' && isalnum(*(src + 2)) && - b36_char_to_int(*(src + 2)) < 16; + if constexpr (cpp::is_same_v<CharType, char>) { + return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) && + b36_char_to_int(src[2]) < 16; + } else { + return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) && + b36_wchar_to_int(src[2]) < 16; + } } // Takes the address of the string pointer and parses the base from the start of // it. -LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) { +template <typename CharType> +LIBC_INLINE static int infer_base(const CharType *__restrict src, + size_t src_len) { // A hexadecimal number is defined as "the prefix 0x or 0X followed by a // sequence of the decimal digits and the letters a (or A) through f (or F) // with values 10 through 15 respectively." (C standard 6.4.4.1) @@ -63,8 +90,15 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) { // An octal number is defined as "the prefix 0 optionally followed by a // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any // number that starts with 0, including just 0, is an octal number. - if (src_len > 0 && src[0] == '0') - return 8; + if (src_len > 0) { + if constexpr (cpp::is_same_v<CharType, char>) { + if (src[0] == '0') + return 8; + } else { + if (src[0] == L'0') + return 8; + } + } // A decimal number is defined as beginning "with a nonzero digit and // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) return 10; @@ -77,32 +111,27 @@ LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) { // ----------------------------------------------------------------------------- // Takes a pointer to a string and the base to convert to. This function is used // as the backend for all of the string to int functions. -template <class T> +template <typename T, typename CharType> LIBC_INLINE StrToNumResult<T> -strtointeger(const char *__restrict src, int base, +strtointeger(const CharType *__restrict src, int base, const size_t src_len = cpp::numeric_limits<size_t>::max()) { using ResultType = make_integral_or_big_int_unsigned_t<T>; - ResultType result = 0; - - bool is_number = false; - size_t src_cur = 0; - int error_val = 0; - if (src_len == 0) return {0, 0, 0}; if (base < 0 || base == 1 || base > 36) return {0, 0, EINVAL}; - src_cur = first_non_whitespace(src, src_len); - - char result_sign = '+'; - if (src[src_cur] == '+' || src[src_cur] == '-') { - result_sign = src[src_cur]; - ++src_cur; + size_t src_cur = first_non_whitespace(src, src_len); + if (src_cur == src_len) { + return {0, 0, 0}; } + int sign = get_sign(src + src_cur); + bool is_positive = (sign >= 0); + src_cur += (sign != 0); + if (base == 0) base = infer_base(src + src_cur, src_len - src_cur); @@ -110,8 +139,6 @@ strtointeger(const char *__restrict src, int base, src_cur = src_cur + 2; constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>; - const bool is_positive = (result_sign == '+'); - ResultType constexpr NEGATIVE_MAX = !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1 : cpp::numeric_limits<T>::max(); @@ -120,8 +147,21 @@ strtointeger(const char *__restrict src, int base, ResultType const abs_max_div_by_base = abs_max / static_cast<ResultType>(base); - while (src_cur < src_len && isalnum(src[src_cur])) { - int cur_digit = b36_char_to_int(src[src_cur]); + bool is_number = false; + int error_val = 0; + ResultType result = 0; + while (src_cur < src_len) { + int cur_digit; + if constexpr (cpp::is_same_v<CharType, char>) { + if (!isalnum(src[src_cur])) + break; + cur_digit = b36_char_to_int(src[src_cur]); + } else { + if (!iswalnum(src[src_cur])) + break; + cur_digit = b36_wchar_to_int(src[src_cur]); + } + if (cur_digit >= base) break; diff --git a/libc/src/__support/wcs_to_integer.h b/libc/src/__support/wcs_to_integer.h deleted file mode 100644 index 4254bd8..0000000 --- a/libc/src/__support/wcs_to_integer.h +++ /dev/null @@ -1,155 +0,0 @@ -//===-- Widechar string to integer conversion utils -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H -#define LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H - -#include "hdr/errno_macros.h" // For ERANGE -#include "src/__support/CPP/limits.h" -#include "src/__support/CPP/type_traits.h" -#include "src/__support/CPP/type_traits/make_unsigned.h" -#include "src/__support/big_int.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/str_to_num_result.h" -#include "src/__support/uint128.h" -#include "src/__support/wctype_utils.h" - -namespace LIBC_NAMESPACE_DECL { -namespace internal { - -// Returns the idx of the first character in src that is not a whitespace -// character (as determined by iswspace()) -LIBC_INLINE size_t -first_non_whitespace(const wchar_t *__restrict src, - size_t src_len = cpp::numeric_limits<size_t>::max()) { - size_t src_cur = 0; - while (src_cur < src_len && internal::iswspace(src[src_cur])) { - ++src_cur; - } - return src_cur; -} - -// checks if the next 3 characters of the string pointer are the start of a -// hexadecimal number. Does not advance the string pointer. -LIBC_INLINE bool -is_hex_start(const wchar_t *__restrict src, - size_t src_len = cpp::numeric_limits<size_t>::max()) { - if (src_len < 3) - return false; - return *src == L'0' && towlower(*(src + 1)) == L'x' && iswalnum(*(src + 2)) && - b36_wchar_to_int(*(src + 2)) < 16; -} - -// Takes the address of the string pointer and parses the base from the start of -// it. -LIBC_INLINE int infer_base(const wchar_t *__restrict src, size_t src_len) { - // A hexadecimal number is defined as "the prefix 0x or 0X followed by a - // sequence of the decimal digits and the letters a (or A) through f (or F) - // with values 10 through 15 respectively." (C standard 6.4.4.1) - if (is_hex_start(src, src_len)) - return 16; - // An octal number is defined as "the prefix 0 optionally followed by a - // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any - // number that starts with 0, including just 0, is an octal number. - if (src_len > 0 && src[0] == L'0') - return 8; - // A decimal number is defined as beginning "with a nonzero digit and - // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) - return 10; -} - -template <class T> -LIBC_INLINE StrToNumResult<T> -wcstointeger(const wchar_t *__restrict src, int base, - const size_t src_len = cpp::numeric_limits<size_t>::max()) { - using ResultType = make_integral_or_big_int_unsigned_t<T>; - - ResultType result = 0; - - bool is_number = false; - size_t src_cur = 0; - int error_val = 0; - - if (src_len == 0) - return {0, 0, 0}; - - if (base < 0 || base == 1 || base > 36) - return {0, 0, EINVAL}; - - src_cur = first_non_whitespace(src, src_len); - - wchar_t result_sign = L'+'; - if (src[src_cur] == L'+' || src[src_cur] == L'-') { - result_sign = src[src_cur]; - ++src_cur; - } - - if (base == 0) - base = infer_base(src + src_cur, src_len - src_cur); - - if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur)) - src_cur = src_cur + 2; - - constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>; - const bool is_positive = (result_sign == L'+'); - - ResultType constexpr NEGATIVE_MAX = - !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1 - : cpp::numeric_limits<T>::max(); - ResultType const abs_max = - (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX); - ResultType const abs_max_div_by_base = - abs_max / static_cast<ResultType>(base); - - while (src_cur < src_len && iswalnum(src[src_cur])) { - int cur_digit = b36_wchar_to_int(src[src_cur]); - if (cur_digit >= base) - break; - - is_number = true; - ++src_cur; - - // If the number has already hit the maximum value for the current type then - // the result cannot change, but we still need to advance src to the end of - // the number. - if (result == abs_max) { - error_val = ERANGE; - continue; - } - - if (result > abs_max_div_by_base) { - result = abs_max; - error_val = ERANGE; - } else { - result = result * static_cast<ResultType>(base); - } - if (result > abs_max - static_cast<ResultType>(cur_digit)) { - result = abs_max; - error_val = ERANGE; - } else { - result = result + static_cast<ResultType>(cur_digit); - } - } - - ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0; - - if (error_val == ERANGE) { - if (is_positive || IS_UNSIGNED) - return {cpp::numeric_limits<T>::max(), str_len, error_val}; - else // T is signed and there is a negative overflow - return {cpp::numeric_limits<T>::min(), str_len, error_val}; - } - - return {static_cast<T>(is_positive ? result : -result), str_len, error_val}; -} - -} // namespace internal -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index adde382..ba27cd7 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -63,7 +63,7 @@ add_entrypoint_object( wcstol.h DEPENDS libc.src.errno.errno - libc.src.__support.wcs_to_integer + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -74,7 +74,7 @@ add_entrypoint_object( wcstoll.h DEPENDS libc.src.errno.errno - libc.src.__support.wcs_to_integer + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -85,7 +85,7 @@ add_entrypoint_object( wcstoul.h DEPENDS libc.src.errno.errno - libc.src.__support.wcs_to_integer + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -96,7 +96,7 @@ add_entrypoint_object( wcstoull.h DEPENDS libc.src.errno.errno - libc.src.__support.wcs_to_integer + libc.src.__support.str_to_integer ) add_entrypoint_object( diff --git a/libc/src/wchar/wcstol.cpp b/libc/src/wchar/wcstol.cpp index a05718f..a56b5f9 100644 --- a/libc/src/wchar/wcstol.cpp +++ b/libc/src/wchar/wcstol.cpp @@ -10,14 +10,14 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/wcs_to_integer.h" +#include "src/__support/str_to_integer.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(long, wcstol, (const wchar_t *__restrict str, wchar_t **__restrict str_end, int base)) { - auto result = internal::wcstointeger<long>(str, base); + auto result = internal::strtointeger<long>(str, base); if (result.has_error()) libc_errno = result.error; diff --git a/libc/src/wchar/wcstoll.cpp b/libc/src/wchar/wcstoll.cpp index de1299d..6229d24 100644 --- a/libc/src/wchar/wcstoll.cpp +++ b/libc/src/wchar/wcstoll.cpp @@ -10,14 +10,14 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/wcs_to_integer.h" +#include "src/__support/str_to_integer.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(long long, wcstoll, (const wchar_t *__restrict str, wchar_t **__restrict str_end, int base)) { - auto result = internal::wcstointeger<long long>(str, base); + auto result = internal::strtointeger<long long>(str, base); if (result.has_error()) libc_errno = result.error; diff --git a/libc/src/wchar/wcstoul.cpp b/libc/src/wchar/wcstoul.cpp index 79b8c9b..c5639be 100644 --- a/libc/src/wchar/wcstoul.cpp +++ b/libc/src/wchar/wcstoul.cpp @@ -10,14 +10,14 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/wcs_to_integer.h" +#include "src/__support/str_to_integer.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(unsigned long, wcstoul, (const wchar_t *__restrict str, wchar_t **__restrict str_end, int base)) { - auto result = internal::wcstointeger<unsigned long>(str, base); + auto result = internal::strtointeger<unsigned long>(str, base); if (result.has_error()) libc_errno = result.error; diff --git a/libc/src/wchar/wcstoull.cpp b/libc/src/wchar/wcstoull.cpp index 768e03c..2ab24e9 100644 --- a/libc/src/wchar/wcstoull.cpp +++ b/libc/src/wchar/wcstoull.cpp @@ -10,14 +10,14 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/wcs_to_integer.h" +#include "src/__support/str_to_integer.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(unsigned long long, wcstoull, (const wchar_t *__restrict str, wchar_t **__restrict str_end, int base)) { - auto result = internal::wcstointeger<unsigned long long>(str, base); + auto result = internal::strtointeger<unsigned long long>(str, base); if (result.has_error()) libc_errno = result.error; |
