diff options
Diffstat (limited to 'libc/src')
64 files changed, 817 insertions, 911 deletions
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 4e90aad..5090dc2 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -267,7 +267,9 @@ add_header_library( HDRS fixedvector.h DEPENDS + .libc_assert libc.src.__support.CPP.array + libc.src.string.memory_utils.inline_memset ) add_header_library( diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index 972249f..528542c 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -42,7 +42,7 @@ FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) { if (pos > 0) { // If the buffer is not empty // Flush the buffer const size_t write_size = pos; - auto write_result = platform_write(this, buf, write_size); + FileIOResult write_result = platform_write(this, buf, write_size); pos = 0; // Buffer is now empty so reset pos to the beginning. // If less bytes were written than expected, then an error occurred. if (write_result < write_size) { @@ -52,7 +52,7 @@ FileIOResult File::write_unlocked_nbf(const uint8_t *data, size_t len) { } } - auto write_result = platform_write(this, data, len); + FileIOResult write_result = platform_write(this, data, len); if (write_result < len) err = true; return write_result; @@ -99,7 +99,7 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) { // is full. const size_t write_size = pos; - auto buf_result = platform_write(this, buf, write_size); + FileIOResult buf_result = platform_write(this, buf, write_size); size_t bytes_written = buf_result.value; pos = 0; // Buffer is now empty so reset pos to the beginning. @@ -121,7 +121,8 @@ FileIOResult File::write_unlocked_fbf(const uint8_t *data, size_t len) { pos = remainder.size(); } else { - auto result = platform_write(this, remainder.data(), remainder.size()); + FileIOResult result = + platform_write(this, remainder.data(), remainder.size()); size_t bytes_written = buf_result.value; // If less bytes were written than expected, then an error occurred. Return @@ -190,6 +191,17 @@ FileIOResult File::read_unlocked(void *data, size_t len) { prev_op = FileOp::READ; + if (bufmode == _IONBF) { // unbuffered. + return read_unlocked_nbf(static_cast<uint8_t *>(data), len); + } else if (bufmode == _IOFBF) { // fully buffered + return read_unlocked_fbf(static_cast<uint8_t *>(data), len); + } else /*if (bufmode == _IOLBF) */ { // line buffered + // There is no line buffered mode for read. Use fully buffered instead. + return read_unlocked_fbf(static_cast<uint8_t *>(data), len); + } +} + +size_t File::copy_data_from_buf(uint8_t *data, size_t len) { cpp::span<uint8_t> bufref(static_cast<uint8_t *>(buf), bufsize); cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data), len); @@ -209,32 +221,42 @@ FileIOResult File::read_unlocked(void *data, size_t len) { for (size_t i = 0; i < available_data; ++i) dataref[i] = bufref[i + pos]; read_limit = pos = 0; // Reset the pointers. + + return available_data; +} + +FileIOResult File::read_unlocked_fbf(uint8_t *data, size_t len) { + // Read data from the buffer first. + size_t available_data = copy_data_from_buf(data, len); + if (available_data == len) + return available_data; + // Update the dataref to reflect that fact that we have already // copied |available_data| into |data|. - dataref = cpp::span<uint8_t>(dataref.data() + available_data, - dataref.size() - available_data); - size_t to_fetch = len - available_data; + cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data, + to_fetch); + if (to_fetch > bufsize) { - auto result = platform_read(this, dataref.data(), to_fetch); + FileIOResult result = platform_read(this, dataref.data(), to_fetch); size_t fetched_size = result.value; if (result.has_error() || fetched_size < to_fetch) { if (!result.has_error()) eof = true; else err = true; - return {available_data + fetched_size, result.has_error()}; + return {available_data + fetched_size, result.error}; } return len; } // Fetch and buffer another buffer worth of data. - auto result = platform_read(this, buf, bufsize); + FileIOResult result = platform_read(this, buf, bufsize); size_t fetched_size = result.value; read_limit += fetched_size; size_t transfer_size = fetched_size >= to_fetch ? to_fetch : fetched_size; for (size_t i = 0; i < transfer_size; ++i) - dataref[i] = bufref[i]; + dataref[i] = buf[i]; pos += transfer_size; if (result.has_error() || fetched_size < to_fetch) { if (!result.has_error()) @@ -245,6 +267,26 @@ FileIOResult File::read_unlocked(void *data, size_t len) { return {transfer_size + available_data, result.error}; } +FileIOResult File::read_unlocked_nbf(uint8_t *data, size_t len) { + // Check whether there is a character in the ungetc buffer. + size_t available_data = copy_data_from_buf(data, len); + if (available_data == len) + return available_data; + + // Directly copy the data into |data|. + cpp::span<uint8_t> dataref(static_cast<uint8_t *>(data) + available_data, + len - available_data); + FileIOResult result = platform_read(this, dataref.data(), dataref.size()); + + if (result.has_error() || result < dataref.size()) { + if (!result.has_error()) + eof = true; + else + err = true; + } + return {result + available_data, result.error}; +} + int File::ungetc_unlocked(int c) { // There is no meaning to unget if: // 1. You are trying to push back EOF. @@ -287,7 +329,7 @@ ErrorOr<int> File::seek(off_t offset, int whence) { FileLock lock(this); if (prev_op == FileOp::WRITE && pos > 0) { - auto buf_result = platform_write(this, buf, pos); + FileIOResult buf_result = platform_write(this, buf, pos); if (buf_result.has_error() || buf_result.value < pos) { err = true; return Error(buf_result.error); @@ -325,7 +367,7 @@ ErrorOr<off_t> File::tell() { int File::flush_unlocked() { if (prev_op == FileOp::WRITE && pos > 0) { - auto buf_result = platform_write(this, buf, pos); + FileIOResult buf_result = platform_write(this, buf, pos); if (buf_result.has_error() || buf_result.value < pos) { err = true; return buf_result.error; diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 42e1d11..5c97a9c 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -280,6 +280,10 @@ private: FileIOResult write_unlocked_fbf(const uint8_t *data, size_t len); FileIOResult write_unlocked_nbf(const uint8_t *data, size_t len); + FileIOResult read_unlocked_fbf(uint8_t *data, size_t len); + FileIOResult read_unlocked_nbf(uint8_t *data, size_t len); + size_t copy_data_from_buf(uint8_t *data, size_t len); + constexpr void adjust_buf() { if (read_allowed() && (buf == nullptr || bufsize == 0)) { // We should allow atleast one ungetc operation. diff --git a/libc/src/__support/GPU/CMakeLists.txt b/libc/src/__support/GPU/CMakeLists.txt index 28fd9a1..9b359f6 100644 --- a/libc/src/__support/GPU/CMakeLists.txt +++ b/libc/src/__support/GPU/CMakeLists.txt @@ -1,16 +1,12 @@ -if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) +# These utilities are GPU only. +if(NOT LIBC_TARGET_OS_IS_GPU) return() endif() -add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) -set(target_gpu_utils libc.src.__support.GPU.${LIBC_TARGET_ARCHITECTURE}.${LIBC_TARGET_ARCHITECTURE}_utils) - add_header_library( utils HDRS utils.h - DEPENDS - ${target_gpu_utils} ) add_object_library( @@ -21,6 +17,6 @@ add_object_library( allocator.h DEPENDS libc.src.__support.common - libc.src.__support.GPU.utils libc.src.__support.RPC.rpc_client + .utils ) diff --git a/libc/src/__support/GPU/amdgpu/CMakeLists.txt b/libc/src/__support/GPU/amdgpu/CMakeLists.txt deleted file mode 100644 index f2b98fc..0000000 --- a/libc/src/__support/GPU/amdgpu/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_header_library( - amdgpu_utils - HDRS - utils.h - DEPENDS - libc.src.__support.common -) diff --git a/libc/src/__support/GPU/amdgpu/utils.h b/libc/src/__support/GPU/amdgpu/utils.h deleted file mode 100644 index 6ab9540..0000000 --- a/libc/src/__support/GPU/amdgpu/utils.h +++ /dev/null @@ -1,183 +0,0 @@ -//===-------------- AMDGPU implementation of GPU utils ----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_AMDGPU_IO_H -#define LLVM_LIBC_SRC___SUPPORT_GPU_AMDGPU_IO_H - -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -#include <stdint.h> - -namespace LIBC_NAMESPACE_DECL { -namespace gpu { - -/// Type aliases to the address spaces used by the AMDGPU backend. -template <typename T> using Private = [[clang::opencl_private]] T; -template <typename T> using Constant = [[clang::opencl_constant]] T; -template <typename T> using Local = [[clang::opencl_local]] T; -template <typename T> using Global = [[clang::opencl_global]] T; - -/// Returns the number of workgroups in the 'x' dimension of the grid. -LIBC_INLINE uint32_t get_num_blocks_x() { - return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); -} - -/// Returns the number of workgroups in the 'y' dimension of the grid. -LIBC_INLINE uint32_t get_num_blocks_y() { - return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y(); -} - -/// Returns the number of workgroups in the 'z' dimension of the grid. -LIBC_INLINE uint32_t get_num_blocks_z() { - return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z(); -} - -/// Returns the total number of workgruops in the grid. -LIBC_INLINE uint64_t get_num_blocks() { - return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); -} - -/// Returns the 'x' dimension of the current AMD workgroup's id. -LIBC_INLINE uint32_t get_block_id_x() { - return __builtin_amdgcn_workgroup_id_x(); -} - -/// Returns the 'y' dimension of the current AMD workgroup's id. -LIBC_INLINE uint32_t get_block_id_y() { - return __builtin_amdgcn_workgroup_id_y(); -} - -/// Returns the 'z' dimension of the current AMD workgroup's id. -LIBC_INLINE uint32_t get_block_id_z() { - return __builtin_amdgcn_workgroup_id_z(); -} - -/// Returns the absolute id of the AMD workgroup. -LIBC_INLINE uint64_t get_block_id() { - return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + - get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); -} - -/// Returns the number of workitems in the 'x' dimension. -LIBC_INLINE uint32_t get_num_threads_x() { - return __builtin_amdgcn_workgroup_size_x(); -} - -/// Returns the number of workitems in the 'y' dimension. -LIBC_INLINE uint32_t get_num_threads_y() { - return __builtin_amdgcn_workgroup_size_y(); -} - -/// Returns the number of workitems in the 'z' dimension. -LIBC_INLINE uint32_t get_num_threads_z() { - return __builtin_amdgcn_workgroup_size_z(); -} - -/// Returns the total number of workitems in the workgroup. -LIBC_INLINE uint64_t get_num_threads() { - return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); -} - -/// Returns the 'x' dimension id of the workitem in the current AMD workgroup. -LIBC_INLINE uint32_t get_thread_id_x() { - return __builtin_amdgcn_workitem_id_x(); -} - -/// Returns the 'y' dimension id of the workitem in the current AMD workgroup. -LIBC_INLINE uint32_t get_thread_id_y() { - return __builtin_amdgcn_workitem_id_y(); -} - -/// Returns the 'z' dimension id of the workitem in the current AMD workgroup. -LIBC_INLINE uint32_t get_thread_id_z() { - return __builtin_amdgcn_workitem_id_z(); -} - -/// Returns the absolute id of the thread in the current AMD workgroup. -LIBC_INLINE uint64_t get_thread_id() { - return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + - get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); -} - -/// Returns the size of an AMD wavefront, either 32 or 64 depending on hardware -/// and compilation options. -LIBC_INLINE uint32_t get_lane_size() { - return __builtin_amdgcn_wavefrontsize(); -} - -/// Returns the id of the thread inside of an AMD wavefront executing together. -[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() { - return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); -} - -/// Returns the bit-mask of active threads in the current wavefront. -[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() { - return __builtin_amdgcn_read_exec(); -} - -/// Copies the value from the first active thread in the wavefront to the rest. -[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint64_t, - uint32_t x) { - return __builtin_amdgcn_readfirstlane(x); -} - -/// Returns a bitmask of threads in the current lane for which \p x is true. -[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { - // the lane_mask & gives the nvptx semantics when lane_mask is a subset of - // the active threads - return lane_mask & __builtin_amdgcn_ballot_w64(x); -} - -/// Waits for all the threads in the block to converge and issues a fence. -[[clang::convergent]] LIBC_INLINE void sync_threads() { - __builtin_amdgcn_s_barrier(); - __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); -} - -/// Waits for all pending memory operations to complete in program order. -[[clang::convergent]] LIBC_INLINE void memory_fence() { - __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, ""); -} - -/// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU. -[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t) { - __builtin_amdgcn_wave_barrier(); -} - -/// Shuffles the the lanes inside the wavefront according to the given index. -[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t idx, - uint32_t x) { - return __builtin_amdgcn_ds_bpermute(idx << 2, x); -} - -/// Returns the current value of the GPU's processor clock. -/// NOTE: The RDNA3 and RDNA2 architectures use a 20-bit cycle counter. -LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } - -/// Returns a fixed-frequency timestamp. The actual frequency is dependent on -/// the card and can only be queried via the driver. -LIBC_INLINE uint64_t fixed_frequency_clock() { - return __builtin_readsteadycounter(); -} - -/// Terminates execution of the associated wavefront. -[[noreturn]] LIBC_INLINE void end_program() { __builtin_amdgcn_endpgm(); } - -/// Returns a unique identifier for the process cluster the current wavefront is -/// executing on. Here we use the identifier for the compute unit (CU) and -/// shader engine. -/// FIXME: Currently unimplemented on AMDGPU until we have a simpler interface -/// than the one at -/// https://github.com/ROCm/clr/blob/develop/hipamd/include/hip/amd_detail/amd_device_functions.h#L899 -LIBC_INLINE uint32_t get_cluster_id() { return 0; } - -} // namespace gpu -} // namespace LIBC_NAMESPACE_DECL - -#endif diff --git a/libc/src/__support/GPU/generic/CMakeLists.txt b/libc/src/__support/GPU/generic/CMakeLists.txt deleted file mode 100644 index 68ba7d1..0000000 --- a/libc/src/__support/GPU/generic/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_header_library( - generic_utils - HDRS - utils.h - DEPENDS - libc.src.__support.common -) diff --git a/libc/src/__support/GPU/generic/utils.h b/libc/src/__support/GPU/generic/utils.h deleted file mode 100644 index 9461ef0..0000000 --- a/libc/src/__support/GPU/generic/utils.h +++ /dev/null @@ -1,84 +0,0 @@ -//===-------------- Generic implementation of GPU utils ---------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H -#define LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H - -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -#include <stdint.h> - -namespace LIBC_NAMESPACE_DECL { -namespace gpu { - -template <typename T> using Private = T; -template <typename T> using Constant = T; -template <typename T> using Shared = T; -template <typename T> using Global = T; - -LIBC_INLINE uint32_t get_num_blocks_x() { return 1; } - -LIBC_INLINE uint32_t get_num_blocks_y() { return 1; } - -LIBC_INLINE uint32_t get_num_blocks_z() { return 1; } - -LIBC_INLINE uint64_t get_num_blocks() { return 1; } - -LIBC_INLINE uint32_t get_block_id_x() { return 0; } - -LIBC_INLINE uint32_t get_block_id_y() { return 0; } - -LIBC_INLINE uint32_t get_block_id_z() { return 0; } - -LIBC_INLINE uint64_t get_block_id() { return 0; } - -LIBC_INLINE uint32_t get_num_threads_x() { return 1; } - -LIBC_INLINE uint32_t get_num_threads_y() { return 1; } - -LIBC_INLINE uint32_t get_num_threads_z() { return 1; } - -LIBC_INLINE uint64_t get_num_threads() { return 1; } - -LIBC_INLINE uint32_t get_thread_id_x() { return 0; } - -LIBC_INLINE uint32_t get_thread_id_y() { return 0; } - -LIBC_INLINE uint32_t get_thread_id_z() { return 0; } - -LIBC_INLINE uint64_t get_thread_id() { return 0; } - -LIBC_INLINE uint32_t get_lane_size() { return 1; } - -LIBC_INLINE uint32_t get_lane_id() { return 0; } - -LIBC_INLINE uint64_t get_lane_mask() { return 1; } - -LIBC_INLINE uint32_t broadcast_value(uint64_t, uint32_t x) { return x; } - -LIBC_INLINE uint64_t ballot(uint64_t, bool x) { return x; } - -LIBC_INLINE void sync_threads() {} - -LIBC_INLINE void sync_lane(uint64_t) {} - -LIBC_INLINE uint32_t shuffle(uint64_t, uint32_t, uint32_t x) { return x; } - -LIBC_INLINE uint64_t processor_clock() { return 0; } - -LIBC_INLINE uint64_t fixed_frequency_clock() { return 0; } - -[[noreturn]] LIBC_INLINE void end_program() { __builtin_unreachable(); } - -LIBC_INLINE uint32_t get_cluster_id() { return 0; } - -} // namespace gpu -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC___SUPPORT_GPU_GENERIC_UTILS_H diff --git a/libc/src/__support/GPU/nvptx/CMakeLists.txt b/libc/src/__support/GPU/nvptx/CMakeLists.txt deleted file mode 100644 index 0d3f8c7..0000000 --- a/libc/src/__support/GPU/nvptx/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_header_library( - nvptx_utils - HDRS - utils.h - DEPENDS - libc.src.__support.common -) diff --git a/libc/src/__support/GPU/nvptx/utils.h b/libc/src/__support/GPU/nvptx/utils.h deleted file mode 100644 index 1a43a83..0000000 --- a/libc/src/__support/GPU/nvptx/utils.h +++ /dev/null @@ -1,160 +0,0 @@ -//===-------------- NVPTX implementation of GPU utils -----------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-id: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_NVPTX_IO_H -#define LLVM_LIBC_SRC___SUPPORT_GPU_NVPTX_IO_H - -#include "src/__support/common.h" -#include "src/__support/macros/config.h" - -#include <stdint.h> - -namespace LIBC_NAMESPACE_DECL { -namespace gpu { - -/// Type aliases to the address spaces used by the NVPTX backend. -template <typename T> using Private = [[clang::opencl_private]] T; -template <typename T> using Constant = [[clang::opencl_constant]] T; -template <typename T> using Local = [[clang::opencl_local]] T; -template <typename T> using Global = [[clang::opencl_global]] T; - -/// Returns the number of CUDA blocks in the 'x' dimension. -LIBC_INLINE uint32_t get_num_blocks_x() { - return __nvvm_read_ptx_sreg_nctaid_x(); -} - -/// Returns the number of CUDA blocks in the 'y' dimension. -LIBC_INLINE uint32_t get_num_blocks_y() { - return __nvvm_read_ptx_sreg_nctaid_y(); -} - -/// Returns the number of CUDA blocks in the 'z' dimension. -LIBC_INLINE uint32_t get_num_blocks_z() { - return __nvvm_read_ptx_sreg_nctaid_z(); -} - -/// Returns the total number of CUDA blocks. -LIBC_INLINE uint64_t get_num_blocks() { - return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); -} - -/// Returns the 'x' dimension of the current CUDA block's id. -LIBC_INLINE uint32_t get_block_id_x() { return __nvvm_read_ptx_sreg_ctaid_x(); } - -/// Returns the 'y' dimension of the current CUDA block's id. -LIBC_INLINE uint32_t get_block_id_y() { return __nvvm_read_ptx_sreg_ctaid_y(); } - -/// Returns the 'z' dimension of the current CUDA block's id. -LIBC_INLINE uint32_t get_block_id_z() { return __nvvm_read_ptx_sreg_ctaid_z(); } - -/// Returns the absolute id of the CUDA block. -LIBC_INLINE uint64_t get_block_id() { - return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + - get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); -} - -/// Returns the number of CUDA threads in the 'x' dimension. -LIBC_INLINE uint32_t get_num_threads_x() { - return __nvvm_read_ptx_sreg_ntid_x(); -} - -/// Returns the number of CUDA threads in the 'y' dimension. -LIBC_INLINE uint32_t get_num_threads_y() { - return __nvvm_read_ptx_sreg_ntid_y(); -} - -/// Returns the number of CUDA threads in the 'z' dimension. -LIBC_INLINE uint32_t get_num_threads_z() { - return __nvvm_read_ptx_sreg_ntid_z(); -} - -/// Returns the total number of threads in the block. -LIBC_INLINE uint64_t get_num_threads() { - return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); -} - -/// Returns the 'x' dimension id of the thread in the current CUDA block. -LIBC_INLINE uint32_t get_thread_id_x() { return __nvvm_read_ptx_sreg_tid_x(); } - -/// Returns the 'y' dimension id of the thread in the current CUDA block. -LIBC_INLINE uint32_t get_thread_id_y() { return __nvvm_read_ptx_sreg_tid_y(); } - -/// Returns the 'z' dimension id of the thread in the current CUDA block. -LIBC_INLINE uint32_t get_thread_id_z() { return __nvvm_read_ptx_sreg_tid_z(); } - -/// Returns the absolute id of the thread in the current CUDA block. -LIBC_INLINE uint64_t get_thread_id() { - return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + - get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); -} - -/// Returns the size of a CUDA warp, always 32 on NVIDIA hardware. -LIBC_INLINE uint32_t get_lane_size() { return 32; } - -/// Returns the id of the thread inside of a CUDA warp executing together. -[[clang::convergent]] LIBC_INLINE uint32_t get_lane_id() { - return __nvvm_read_ptx_sreg_laneid(); -} - -/// Returns the bit-mask of active threads in the current warp. -[[clang::convergent]] LIBC_INLINE uint64_t get_lane_mask() { - return __nvvm_activemask(); -} - -/// Copies the value from the first active thread in the warp to the rest. -[[clang::convergent]] LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, - uint32_t x) { - uint32_t mask = static_cast<uint32_t>(lane_mask); - uint32_t id = __builtin_ffs(mask) - 1; - return __nvvm_shfl_sync_idx_i32(mask, x, id, get_lane_size() - 1); -} - -/// Returns a bitmask of threads in the current lane for which \p x is true. -[[clang::convergent]] LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { - uint32_t mask = static_cast<uint32_t>(lane_mask); - return __nvvm_vote_ballot_sync(mask, x); -} - -/// Waits for all the threads in the block to converge and issues a fence. -[[clang::convergent]] LIBC_INLINE void sync_threads() { __syncthreads(); } - -/// Waits for all pending memory operations to complete in program order. -[[clang::convergent]] LIBC_INLINE void memory_fence() { __nvvm_membar_sys(); } - -/// Waits for all threads in the warp to reconverge for independent scheduling. -[[clang::convergent]] LIBC_INLINE void sync_lane(uint64_t mask) { - __nvvm_bar_warp_sync(static_cast<uint32_t>(mask)); -} - -/// Shuffles the the lanes inside the warp according to the given index. -[[clang::convergent]] LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, - uint32_t idx, uint32_t x) { - uint32_t mask = static_cast<uint32_t>(lane_mask); - uint32_t bitmask = (mask >> idx) & 1; - return -bitmask & __nvvm_shfl_sync_idx_i32(mask, x, idx, get_lane_size() - 1); -} - -/// Returns the current value of the GPU's processor clock. -LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } - -/// Returns a global fixed-frequency timer at nanosecond frequency. -LIBC_INLINE uint64_t fixed_frequency_clock() { - return __builtin_readsteadycounter(); -} - -/// Terminates execution of the calling thread. -[[noreturn]] LIBC_INLINE void end_program() { __nvvm_exit(); } - -/// Returns a unique identifier for the process cluster the current warp is -/// executing on. Here we use the identifier for the symmetric multiprocessor. -LIBC_INLINE uint32_t get_cluster_id() { return __nvvm_read_ptx_sreg_smid(); } - -} // namespace gpu -} // namespace LIBC_NAMESPACE_DECL - -#endif diff --git a/libc/src/__support/GPU/utils.h b/libc/src/__support/GPU/utils.h index ae52e7a..e138c84 100644 --- a/libc/src/__support/GPU/utils.h +++ b/libc/src/__support/GPU/utils.h @@ -9,48 +9,108 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H +#include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" #include "src/__support/macros/properties/architectures.h" -#if defined(LIBC_TARGET_ARCH_IS_AMDGPU) -#include "amdgpu/utils.h" -#elif defined(LIBC_TARGET_ARCH_IS_NVPTX) -#include "nvptx/utils.h" -#else -#include "generic/utils.h" +#if !__has_include(<gpuintrin.h>) +#error "Unsupported compiler" #endif +#include <gpuintrin.h> + namespace LIBC_NAMESPACE_DECL { namespace gpu { -/// Get the first active thread inside the lane. -LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) { - return __builtin_ffsll(lane_mask) - 1; + +template <typename T> using Private = __gpu_private T; +template <typename T> using Constant = __gpu_constant T; +template <typename T> using Local = __gpu_local T; +template <typename T> using Global = __gpu_local T; + +LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); } + +LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); } + +LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); } + +LIBC_INLINE uint64_t get_num_blocks() { + return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); +} + +LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); } + +LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); } + +LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); } + +LIBC_INLINE uint64_t get_block_id() { + return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + + get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); +} + +LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); } + +LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); } + +LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); } + +LIBC_INLINE uint64_t get_num_threads() { + return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); +} + +LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); } + +LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); } + +LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); } + +LIBC_INLINE uint64_t get_thread_id() { + return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + + get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); +} + +LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); } + +LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); } + +LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); } + +LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { + return __gpu_read_first_lane_u32(lane_mask, x); +} + +LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { + return __gpu_ballot(lane_mask, x); +} + +LIBC_INLINE void sync_threads() { __gpu_sync_threads(); } + +LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); } + +LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x) { + return __gpu_shuffle_idx_u32(lane_mask, idx, x); } -/// Conditional that is only true for a single thread in a lane. +[[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); } + LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { - return gpu::get_lane_id() == get_first_lane_id(lane_mask); + return __gpu_is_first_in_lane(lane_mask); } -/// Gets the sum of all lanes inside the warp or wavefront. LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { - for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) { - uint32_t index = step + gpu::get_lane_id(); - x += gpu::shuffle(lane_mask, index, x); - } - return gpu::broadcast_value(lane_mask, x); + return __gpu_lane_sum_u32(lane_mask, x); } -/// Gets the accumulator scan of the threads in the warp or wavefront. LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { - for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) { - uint32_t index = gpu::get_lane_id() - step; - uint32_t bitmask = gpu::get_lane_id() >= step; - x += -bitmask & gpu::shuffle(lane_mask, index, x); - } - return x; + return __gpu_lane_scan_u32(lane_mask, x); +} + +LIBC_INLINE uint64_t fixed_frequency_clock() { + return __builtin_readsteadycounter(); } +LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } + } // namespace gpu } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/fixedvector.h b/libc/src/__support/fixedvector.h index 7ac0c23..34601f8 100644 --- a/libc/src/__support/fixedvector.h +++ b/libc/src/__support/fixedvector.h @@ -10,9 +10,10 @@ #define LLVM_LIBC_SRC___SUPPORT_FIXEDVECTOR_H #include "src/__support/CPP/array.h" - #include "src/__support/CPP/iterator.h" +#include "src/__support/libc_assert.h" #include "src/__support/macros/config.h" +#include "src/string/memory_utils/inline_memset.h" namespace LIBC_NAMESPACE_DECL { @@ -23,27 +24,32 @@ template <typename T, size_t CAPACITY> class FixedVector { size_t item_count = 0; public: - constexpr FixedVector() = default; + LIBC_INLINE constexpr FixedVector() = default; using iterator = typename cpp::array<T, CAPACITY>::iterator; - constexpr FixedVector(iterator begin, iterator end) : store{}, item_count{} { + LIBC_INLINE constexpr FixedVector(iterator begin, iterator end) + : store{}, item_count{} { + LIBC_ASSERT(begin + CAPACITY >= end); for (; begin != end; ++begin) push_back(*begin); } using const_iterator = typename cpp::array<T, CAPACITY>::const_iterator; - constexpr FixedVector(const_iterator begin, const_iterator end) + LIBC_INLINE constexpr FixedVector(const_iterator begin, const_iterator end) : store{}, item_count{} { + LIBC_ASSERT(begin + CAPACITY >= end); for (; begin != end; ++begin) push_back(*begin); } - constexpr FixedVector(size_t count, const T &value) : store{}, item_count{} { + LIBC_INLINE constexpr FixedVector(size_t count, const T &value) + : store{}, item_count{} { + LIBC_ASSERT(count <= CAPACITY); for (size_t i = 0; i < count; ++i) push_back(value); } - constexpr bool push_back(const T &obj) { + LIBC_INLINE constexpr bool push_back(const T &obj) { if (item_count == CAPACITY) return false; store[item_count] = obj; @@ -51,27 +57,43 @@ public: return true; } - constexpr const T &back() const { return store[item_count - 1]; } + LIBC_INLINE constexpr const T &back() const { + LIBC_ASSERT(!empty()); + return store[item_count - 1]; + } - constexpr T &back() { return store[item_count - 1]; } + LIBC_INLINE constexpr T &back() { + LIBC_ASSERT(!empty()); + return store[item_count - 1]; + } - constexpr bool pop_back() { + LIBC_INLINE constexpr bool pop_back() { if (item_count == 0) return false; + inline_memset(&store[item_count - 1], 0, sizeof(T)); --item_count; return true; } - constexpr T &operator[](size_t idx) { return store[idx]; } + LIBC_INLINE constexpr T &operator[](size_t idx) { + LIBC_ASSERT(idx < item_count); + return store[idx]; + } - constexpr const T &operator[](size_t idx) const { return store[idx]; } + LIBC_INLINE constexpr const T &operator[](size_t idx) const { + LIBC_ASSERT(idx < item_count); + return store[idx]; + } - constexpr bool empty() const { return item_count == 0; } + LIBC_INLINE constexpr bool empty() const { return item_count == 0; } - constexpr size_t size() const { return item_count; } + LIBC_INLINE constexpr size_t size() const { return item_count; } // Empties the store for all practical purposes. - constexpr void reset() { item_count = 0; } + LIBC_INLINE constexpr void reset() { + inline_memset(store.data(), 0, sizeof(T) * item_count); + item_count = 0; + } // This static method does not free up the resources held by |store|, // say by calling `free` or something similar. It just does the equivalent @@ -81,7 +103,9 @@ public: // dynamically allocated storate. So, the `destroy` method like this // matches the `destroy` API of those other data structures so that users // can easily swap one data structure for the other. - static void destroy(FixedVector<T, CAPACITY> *store) { store->reset(); } + LIBC_INLINE static void destroy(FixedVector<T, CAPACITY> *store) { + store->reset(); + } using reverse_iterator = typename cpp::array<T, CAPACITY>::reverse_iterator; LIBC_INLINE constexpr reverse_iterator rbegin() { diff --git a/libc/src/__support/threads/thread.cpp b/libc/src/__support/threads/thread.cpp index dad4f75..6f6b75b 100644 --- a/libc/src/__support/threads/thread.cpp +++ b/libc/src/__support/threads/thread.cpp @@ -117,7 +117,9 @@ public: int add_callback(AtExitCallback *callback, void *obj) { cpp::lock_guard lock(mtx); - return callback_list.push_back({callback, obj}); + if (callback_list.push_back({callback, obj})) + return 0; + return -1; } void call() { diff --git a/libc/src/compiler/generic/__stack_chk_fail.cpp b/libc/src/compiler/generic/__stack_chk_fail.cpp index c76ec14..00e976a 100644 --- a/libc/src/compiler/generic/__stack_chk_fail.cpp +++ b/libc/src/compiler/generic/__stack_chk_fail.cpp @@ -9,9 +9,12 @@ #include "src/compiler/__stack_chk_fail.h" #include "src/__support/OSUtil/io.h" #include "src/stdlib/abort.h" +#include <stdint.h> // For uintptr_t extern "C" { +uintptr_t __stack_chk_guard = static_cast<uintptr_t>(0xa9fff01234); + void __stack_chk_fail(void) { LIBC_NAMESPACE::write_to_stderr("stack smashing detected\n"); LIBC_NAMESPACE::abort(); diff --git a/libc/src/complex/cimagf128.h b/libc/src/complex/cimagf128.h index ab8f9ac..aaf52cf 100644 --- a/libc/src/complex/cimagf128.h +++ b/libc/src/complex/cimagf128.h @@ -6,15 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #ifndef LLVM_LIBC_SRC_COMPLEX_CIMAGF128_H #define LLVM_LIBC_SRC_COMPLEX_CIMAGF128_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" +#include "src/__support/macros/properties/types.h" namespace LIBC_NAMESPACE_DECL { @@ -23,5 +20,3 @@ float128 cimagf128(cfloat128 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CIMAGF128_H - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/cimagf16.h b/libc/src/complex/cimagf16.h index 5c5de2e..81ed4d2 100644 --- a/libc/src/complex/cimagf16.h +++ b/libc/src/complex/cimagf16.h @@ -6,15 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #ifndef LLVM_LIBC_SRC_COMPLEX_CIMAGF16_H #define LLVM_LIBC_SRC_COMPLEX_CIMAGF16_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" +#include "src/__support/macros/properties/types.h" namespace LIBC_NAMESPACE_DECL { @@ -23,5 +20,3 @@ float16 cimagf16(cfloat16 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CIMAGF16_H - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/conjf128.h b/libc/src/complex/conjf128.h index c1ae0b0..cae01d3 100644 --- a/libc/src/complex/conjf128.h +++ b/libc/src/complex/conjf128.h @@ -6,14 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #ifndef LLVM_LIBC_SRC_COMPLEX_CONJF128_H #define LLVM_LIBC_SRC_COMPLEX_CONJF128_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" namespace LIBC_NAMESPACE_DECL { @@ -22,5 +19,3 @@ cfloat128 conjf128(cfloat128 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CONJF128_H - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/conjf16.h b/libc/src/complex/conjf16.h index 685ac8a..dde1221 100644 --- a/libc/src/complex/conjf16.h +++ b/libc/src/complex/conjf16.h @@ -6,14 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #ifndef LLVM_LIBC_SRC_COMPLEX_CONJF16_H #define LLVM_LIBC_SRC_COMPLEX_CONJF16_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" namespace LIBC_NAMESPACE_DECL { @@ -22,5 +19,3 @@ cfloat16 conjf16(cfloat16 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CONJF16_H - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/cprojf128.h b/libc/src/complex/cprojf128.h index 5f7fe99..71c1bbe 100644 --- a/libc/src/complex/cprojf128.h +++ b/libc/src/complex/cprojf128.h @@ -6,14 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #ifndef LLVM_LIBC_SRC_COMPLEX_CPROJF128_H #define LLVM_LIBC_SRC_COMPLEX_CPROJF128_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" namespace LIBC_NAMESPACE_DECL { @@ -22,5 +19,3 @@ cfloat128 cprojf128(cfloat128 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CPROJF128_H - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/cprojf16.h b/libc/src/complex/cprojf16.h index 8cce5f0..f12a46d 100644 --- a/libc/src/complex/cprojf16.h +++ b/libc/src/complex/cprojf16.h @@ -6,14 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #ifndef LLVM_LIBC_SRC_COMPLEX_CPROJF16_H #define LLVM_LIBC_SRC_COMPLEX_CPROJF16_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" namespace LIBC_NAMESPACE_DECL { @@ -22,5 +19,3 @@ cfloat16 cprojf16(cfloat16 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CPROJF16_H - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/crealf128.h b/libc/src/complex/crealf128.h index 4922ae7..b90c3e7 100644 --- a/libc/src/complex/crealf128.h +++ b/libc/src/complex/crealf128.h @@ -6,15 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #ifndef LLVM_LIBC_SRC_COMPLEX_CREALF128_H #define LLVM_LIBC_SRC_COMPLEX_CREALF128_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" +#include "src/__support/macros/properties/types.h" namespace LIBC_NAMESPACE_DECL { @@ -23,5 +20,3 @@ float128 crealf128(cfloat128 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CREALF128_H - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/crealf16.h b/libc/src/complex/crealf16.h index e6098a2..09d6664 100644 --- a/libc/src/complex/crealf16.h +++ b/libc/src/complex/crealf16.h @@ -6,15 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/macros/properties/complex_types.h" -#include "src/__support/macros/properties/types.h" - -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #ifndef LLVM_LIBC_SRC_COMPLEX_CREALF16_H #define LLVM_LIBC_SRC_COMPLEX_CREALF16_H #include "src/__support/macros/config.h" +#include "src/__support/macros/properties/complex_types.h" +#include "src/__support/macros/properties/types.h" namespace LIBC_NAMESPACE_DECL { @@ -23,5 +20,3 @@ float16 crealf16(cfloat16 x); } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_COMPLEX_CREALF16_H - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/generic/cimagf128.cpp b/libc/src/complex/generic/cimagf128.cpp index c21bd7f..78dbb8e 100644 --- a/libc/src/complex/generic/cimagf128.cpp +++ b/libc/src/complex/generic/cimagf128.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/cimagf128.h" -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -21,5 +19,3 @@ LLVM_LIBC_FUNCTION(float128, cimagf128, (cfloat128 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/generic/cimagf16.cpp b/libc/src/complex/generic/cimagf16.cpp index 3616879..25d9b3d 100644 --- a/libc/src/complex/generic/cimagf16.cpp +++ b/libc/src/complex/generic/cimagf16.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/cimagf16.h" -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -21,5 +19,3 @@ LLVM_LIBC_FUNCTION(float16, cimagf16, (cfloat16 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/generic/conjf128.cpp b/libc/src/complex/generic/conjf128.cpp index c65b548..a63809a 100644 --- a/libc/src/complex/generic/conjf128.cpp +++ b/libc/src/complex/generic/conjf128.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/conjf128.h" -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -19,5 +17,3 @@ LLVM_LIBC_FUNCTION(cfloat128, conjf128, (cfloat128 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/generic/conjf16.cpp b/libc/src/complex/generic/conjf16.cpp index dac11e2..cd1ab67 100644 --- a/libc/src/complex/generic/conjf16.cpp +++ b/libc/src/complex/generic/conjf16.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/conjf16.h" -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -19,5 +17,3 @@ LLVM_LIBC_FUNCTION(cfloat16, conjf16, (cfloat16 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/generic/cprojf128.cpp b/libc/src/complex/generic/cprojf128.cpp index 97134b5..eb2cd08 100644 --- a/libc/src/complex/generic/cprojf128.cpp +++ b/libc/src/complex/generic/cprojf128.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/cprojf128.h" -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -19,5 +17,3 @@ LLVM_LIBC_FUNCTION(cfloat128, cprojf128, (cfloat128 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/generic/cprojf16.cpp b/libc/src/complex/generic/cprojf16.cpp index bd0425f..8d2d64a4 100644 --- a/libc/src/complex/generic/cprojf16.cpp +++ b/libc/src/complex/generic/cprojf16.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/cprojf16.h" -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -19,5 +17,3 @@ LLVM_LIBC_FUNCTION(cfloat16, cprojf16, (cfloat16 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/complex/generic/crealf128.cpp b/libc/src/complex/generic/crealf128.cpp index e72a778..e755498 100644 --- a/libc/src/complex/generic/crealf128.cpp +++ b/libc/src/complex/generic/crealf128.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/crealf128.h" -#if defined(LIBC_TYPES_HAS_CFLOAT128) - #include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -21,5 +19,3 @@ LLVM_LIBC_FUNCTION(float128, crealf128, (cfloat128 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT128 diff --git a/libc/src/complex/generic/crealf16.cpp b/libc/src/complex/generic/crealf16.cpp index 3514207..c9e8626 100644 --- a/libc/src/complex/generic/crealf16.cpp +++ b/libc/src/complex/generic/crealf16.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "src/complex/crealf16.h" -#if defined(LIBC_TYPES_HAS_CFLOAT16) - #include "src/__support/CPP/bit.h" #include "src/__support/common.h" #include "src/__support/complex_type.h" @@ -21,5 +19,3 @@ LLVM_LIBC_FUNCTION(float16, crealf16, (cfloat16 x)) { } } // namespace LIBC_NAMESPACE_DECL - -#endif // LIBC_TYPES_HAS_CFLOAT16 diff --git a/libc/src/math/docs/add_math_function.md b/libc/src/math/docs/add_math_function.md index f02d502..daaf1a3 100644 --- a/libc/src/math/docs/add_math_function.md +++ b/libc/src/math/docs/add_math_function.md @@ -18,7 +18,7 @@ together with its specifications: ``` - Add function specs to the file: ``` - libc/hdrgen/yaml/math.yaml + libc/include/math.yaml ``` ## Implementation diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index b3d4612..382f5b3 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -358,7 +358,6 @@ add_header_library( HDRS sincosf16_utils.h DEPENDS - libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.nearest_integer libc.src.__support.common @@ -1702,8 +1701,6 @@ add_header_library( libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization libc.src.__support.common diff --git a/libc/src/math/generic/exp10f_impl.h b/libc/src/math/generic/exp10f_impl.h index d741318..975fd01 100644 --- a/libc/src/math/generic/exp10f_impl.h +++ b/libc/src/math/generic/exp10f_impl.h @@ -10,12 +10,9 @@ #define LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H #include "explogxf.h" -#include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" diff --git a/libc/src/math/generic/range_reduction_double_common.h b/libc/src/math/generic/range_reduction_double_common.h index bcab82f..06aeb49 100644 --- a/libc/src/math/generic/range_reduction_double_common.h +++ b/libc/src/math/generic/range_reduction_double_common.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H #define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H -#include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/double_double.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/multiply_add.h" diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h index 5e5edd4..87b1dde 100644 --- a/libc/src/math/generic/sincosf16_utils.h +++ b/libc/src/math/generic/sincosf16_utils.h @@ -9,9 +9,7 @@ #ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H #define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H -#include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" diff --git a/libc/src/pthread/pthread_condattr_init.cpp b/libc/src/pthread/pthread_condattr_init.cpp index 12005b8..b360804 100644 --- a/libc/src/pthread/pthread_condattr_init.cpp +++ b/libc/src/pthread/pthread_condattr_init.cpp @@ -11,8 +11,8 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include <pthread.h> // pthread_condattr_t, PTHREAD_PROCESS_PRIVATE -#include <time.h> // CLOCK_REALTIME +#include "hdr/time_macros.h" // CLOCK_REALTIME +#include <pthread.h> // pthread_condattr_t, PTHREAD_PROCESS_PRIVATE namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/pthread/pthread_condattr_setclock.cpp b/libc/src/pthread/pthread_condattr_setclock.cpp index 37fbd6b..5e825d5 100644 --- a/libc/src/pthread/pthread_condattr_setclock.cpp +++ b/libc/src/pthread/pthread_condattr_setclock.cpp @@ -12,9 +12,9 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include <pthread.h> // pthread_condattr_t -#include <sys/types.h> // clockid_t -#include <time.h> // CLOCK_MONOTONIC, CLOCK_REALTIME +#include "hdr/time_macros.h" // CLOCK_MONOTONIC, CLOCK_REALTIME +#include <pthread.h> // pthread_condattr_t +#include <sys/types.h> // clockid_t namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/stdlib/exit_handler.h b/libc/src/stdlib/exit_handler.h index 9720c54..e9d163d 100644 --- a/libc/src/stdlib/exit_handler.h +++ b/libc/src/stdlib/exit_handler.h @@ -48,7 +48,7 @@ LIBC_INLINE void stdc_at_exit_func(void *payload) { LIBC_INLINE void call_exit_callbacks(ExitCallbackList &callbacks) { handler_list_mtx.lock(); while (!callbacks.empty()) { - AtExitUnit &unit = callbacks.back(); + AtExitUnit unit = callbacks.back(); callbacks.pop_back(); handler_list_mtx.unlock(); unit.callback(unit.payload); diff --git a/libc/src/stdlib/heap_sort.h b/libc/src/stdlib/heap_sort.h index ccb9ec5..b969977 100644 --- a/libc/src/stdlib/heap_sort.h +++ b/libc/src/stdlib/heap_sort.h @@ -18,11 +18,12 @@ namespace internal { // A simple in-place heapsort implementation. // Follow the implementation in https://en.wikipedia.org/wiki/Heapsort. -LIBC_INLINE void heap_sort(const Array &array) { - size_t end = array.size(); +template <typename A, typename F> +LIBC_INLINE void heap_sort(const A &array, const F &is_less) { + size_t end = array.len(); size_t start = end / 2; - auto left_child = [](size_t i) -> size_t { return 2 * i + 1; }; + const auto left_child = [](size_t i) -> size_t { return 2 * i + 1; }; while (end > 1) { if (start > 0) { @@ -40,12 +41,11 @@ LIBC_INLINE void heap_sort(const Array &array) { while (left_child(root) < end) { size_t child = left_child(root); // If there are two children, set child to the greater. - if (child + 1 < end && - array.elem_compare(child, array.get(child + 1)) < 0) + if ((child + 1 < end) && is_less(array.get(child), array.get(child + 1))) ++child; // If the root is less than the greater child - if (array.elem_compare(root, array.get(child)) >= 0) + if (!is_less(array.get(root), array.get(child))) break; // Swap the root with the greater child and continue sifting down. diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp index 65a63c2..0bf5fc7 100644 --- a/libc/src/stdlib/qsort.cpp +++ b/libc/src/stdlib/qsort.cpp @@ -18,14 +18,12 @@ namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(void, qsort, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *))) { - if (array == nullptr || array_size == 0 || elem_size == 0) - return; - internal::Comparator c(compare); - auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size, - elem_size, c); + const auto is_less = [compare](const void *a, const void *b) -> bool { + return compare(a, b) < 0; + }; - internal::sort(arr); + internal::unstable_sort(array, array_size, elem_size, is_less); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_data.h b/libc/src/stdlib/qsort_data.h index c529d55..aa6d9bb 100644 --- a/libc/src/stdlib/qsort_data.h +++ b/libc/src/stdlib/qsort_data.h @@ -17,91 +17,122 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -using Compare = int(const void *, const void *); -using CompareWithState = int(const void *, const void *, void *); - -enum class CompType { COMPARE, COMPARE_WITH_STATE }; - -struct Comparator { - union { - Compare *comp_func; - CompareWithState *comp_func_r; - }; - const CompType comp_type; - - void *arg; - - Comparator(Compare *func) - : comp_func(func), comp_type(CompType::COMPARE), arg(nullptr) {} - - Comparator(CompareWithState *func, void *arg_val) - : comp_func_r(func), comp_type(CompType::COMPARE_WITH_STATE), - arg(arg_val) {} - -#if defined(__clang__) - // Recent upstream changes to -fsanitize=function find more instances of - // function type mismatches. One case is with the comparator passed to this - // class. Libraries will tend to pass comparators that take pointers to - // varying types while this comparator expects to accept const void pointers. - // Ideally those tools would pass a function that strictly accepts const - // void*s to avoid UB, or would use qsort_r to pass their own comparator. - [[clang::no_sanitize("function")]] -#endif - int comp_vals(const void *a, const void *b) const { - if (comp_type == CompType::COMPARE) { - return comp_func(a, b); - } else { - return comp_func_r(a, b, arg); +class ArrayGenericSize { + cpp::byte *array_base; + size_t array_len; + size_t elem_size; + + LIBC_INLINE cpp::byte *get_internal(size_t i) const { + return array_base + (i * elem_size); + } + +public: + LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e) + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s), + elem_size(e) {} + + static constexpr bool has_fixed_size() { return false; } + + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } + + LIBC_INLINE void swap(size_t i, size_t j) const { + // It's possible to use 8 byte blocks with `uint64_t`, but that + // generates more machine code as the remainder loop gets + // unrolled, plus 4 byte operations are more likely to be + // efficient on a wider variety of hardware. On x86 LLVM tends + // to unroll the block loop again into 2 16 byte swaps per + // iteration which is another reason that 4 byte blocks yields + // good performance even for big types. + using block_t = uint32_t; + constexpr size_t BLOCK_SIZE = sizeof(block_t); + + alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE]; + + cpp::byte *elem_i = get_internal(i); + cpp::byte *elem_j = get_internal(j); + + const size_t elem_size_rem = elem_size % BLOCK_SIZE; + const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem); + + while (elem_i != elem_i_block_end) { + __builtin_memcpy(tmp_block, elem_i, BLOCK_SIZE); + __builtin_memcpy(elem_i, elem_j, BLOCK_SIZE); + __builtin_memcpy(elem_j, tmp_block, BLOCK_SIZE); + + elem_i += BLOCK_SIZE; + elem_j += BLOCK_SIZE; + } + + for (size_t n = 0; n < elem_size_rem; ++n) { + cpp::byte tmp = elem_i[n]; + elem_i[n] = elem_j[n]; + elem_j[n] = tmp; } } + + LIBC_INLINE size_t len() const { return array_len; } + + // Make an Array starting at index |i| and length |s|. + LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const { + return ArrayGenericSize(get_internal(i), s, elem_size); + } + + // Reset this Array to point at a different interval of the same + // items starting at index |i|. + LIBC_INLINE void reset_bounds(size_t i, size_t s) { + array_base = get_internal(i); + array_len = s; + } }; -class Array { - uint8_t *array; - size_t array_size; - size_t elem_size; - Comparator compare; +// Having a specialized Array type for sorting that knows at +// compile-time what the size of the element is, allows for much more +// efficient swapping and for cheaper offset calculations. +template <size_t ELEM_SIZE> class ArrayFixedSize { + cpp::byte *array_base; + size_t array_len; -public: - Array(uint8_t *a, size_t s, size_t e, Comparator c) - : array(a), array_size(s), elem_size(e), compare(c) {} - - uint8_t *get(size_t i) const { return array + i * elem_size; } - - void swap(size_t i, size_t j) const { - uint8_t *elem_i = get(i); - uint8_t *elem_j = get(j); - for (size_t b = 0; b < elem_size; ++b) { - uint8_t temp = elem_i[b]; - elem_i[b] = elem_j[b]; - elem_j[b] = temp; - } + LIBC_INLINE cpp::byte *get_internal(size_t i) const { + return array_base + (i * ELEM_SIZE); } - int elem_compare(size_t i, const uint8_t *other) const { - // An element must compare equal to itself so we don't need to consult the - // user provided comparator. - if (get(i) == other) - return 0; - return compare.comp_vals(get(i), other); +public: + LIBC_INLINE ArrayFixedSize(void *a, size_t s) + : array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {} + + // Beware this function is used a heuristic for cheap to swap types, so + // instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad + // idea perf wise. + static constexpr bool has_fixed_size() { return true; } + + LIBC_INLINE void *get(size_t i) const { return get_internal(i); } + + LIBC_INLINE void swap(size_t i, size_t j) const { + alignas(32) cpp::byte tmp[ELEM_SIZE]; + + cpp::byte *elem_i = get_internal(i); + cpp::byte *elem_j = get_internal(j); + + __builtin_memcpy(tmp, elem_i, ELEM_SIZE); + __builtin_memmove(elem_i, elem_j, ELEM_SIZE); + __builtin_memcpy(elem_j, tmp, ELEM_SIZE); } - size_t size() const { return array_size; } + LIBC_INLINE size_t len() const { return array_len; } - // Make an Array starting at index |i| and size |s|. - LIBC_INLINE Array make_array(size_t i, size_t s) const { - return Array(get(i), s, elem_size, compare); + // Make an Array starting at index |i| and length |s|. + LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const { + return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s); } - // Reset this Array to point at a different interval of the same items. - LIBC_INLINE void reset_bounds(uint8_t *a, size_t s) { - array = a; - array_size = s; + // Reset this Array to point at a different interval of the same + // items starting at index |i|. + LIBC_INLINE void reset_bounds(size_t i, size_t s) { + array_base = get_internal(i); + array_len = s; } }; -using SortingRoutine = void(const Array &); - } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_pivot.h b/libc/src/stdlib/qsort_pivot.h new file mode 100644 index 0000000..b27e746 --- /dev/null +++ b/libc/src/stdlib/qsort_pivot.h @@ -0,0 +1,85 @@ +//===-- Implementation header for qsort utilities ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H +#define LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H + +#include <stddef.h> // For size_t + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +// Recursively select a pseudomedian if above this threshold. +constexpr size_t PSEUDO_MEDIAN_REC_THRESHOLD = 64; + +// Selects a pivot from `array`. Algorithm taken from glidesort by Orson Peters. +// +// This chooses a pivot by sampling an adaptive amount of points, approximating +// the quality of a median of sqrt(n) elements. +template <typename A, typename F> +size_t choose_pivot(const A &array, const F &is_less) { + const size_t len = array.len(); + + if (len < 8) { + return 0; + } + + const size_t len_div_8 = len / 8; + + const size_t a = 0; // [0, floor(n/8)) + const size_t b = len_div_8 * 4; // [4*floor(n/8), 5*floor(n/8)) + const size_t c = len_div_8 * 7; // [7*floor(n/8), 8*floor(n/8)) + + if (len < PSEUDO_MEDIAN_REC_THRESHOLD) + return median3(array, a, b, c, is_less); + else + return median3_rec(array, a, b, c, len_div_8, is_less); +} + +// Calculates an approximate median of 3 elements from sections a, b, c, or +// recursively from an approximation of each, if they're large enough. By +// dividing the size of each section by 8 when recursing we have logarithmic +// recursion depth and overall sample from f(n) = 3*f(n/8) -> f(n) = +// O(n^(log(3)/log(8))) ~= O(n^0.528) elements. +template <typename A, typename F> +size_t median3_rec(const A &array, size_t a, size_t b, size_t c, size_t n, + const F &is_less) { + if (n * 8 >= PSEUDO_MEDIAN_REC_THRESHOLD) { + const size_t n8 = n / 8; + a = median3_rec(array, a, a + (n8 * 4), a + (n8 * 7), n8, is_less); + b = median3_rec(array, b, b + (n8 * 4), b + (n8 * 7), n8, is_less); + c = median3_rec(array, c, c + (n8 * 4), c + (n8 * 7), n8, is_less); + } + return median3(array, a, b, c, is_less); +} + +/// Calculates the median of 3 elements. +template <typename A, typename F> +size_t median3(const A &array, size_t a, size_t b, size_t c, const F &is_less) { + const void *a_ptr = array.get(a); + const void *b_ptr = array.get(b); + const void *c_ptr = array.get(c); + + const bool x = is_less(a_ptr, b_ptr); + const bool y = is_less(a_ptr, c_ptr); + if (x == y) { + // If x=y=0 then b, c <= a. In this case we want to return max(b, c). + // If x=y=1 then a < b, c. In this case we want to return min(b, c). + // By toggling the outcome of b < c using XOR x we get this behavior. + const bool z = is_less(b_ptr, c_ptr); + return z ^ x ? c : b; + } else { + // Either c <= a < b or b <= a < c, thus a is our median. + return a; + } +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_STDLIB_QSORT_PIVOT_H diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp index bf61a40..4e60998 100644 --- a/libc/src/stdlib/qsort_r.cpp +++ b/libc/src/stdlib/qsort_r.cpp @@ -19,13 +19,12 @@ LLVM_LIBC_FUNCTION(void, qsort_r, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *, void *), void *arg)) { - if (array == nullptr || array_size == 0 || elem_size == 0) - return; - internal::Comparator c(compare, arg); - auto arr = internal::Array(reinterpret_cast<uint8_t *>(array), array_size, - elem_size, c); - internal::sort(arr); + const auto is_less = [compare, arg](const void *a, const void *b) -> bool { + return compare(a, b, arg) < 0; + }; + + internal::unstable_sort(array, array_size, elem_size, is_less); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h index d42adde..7882b82 100644 --- a/libc/src/stdlib/qsort_util.h +++ b/libc/src/stdlib/qsort_util.h @@ -27,11 +27,48 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -#if LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT -constexpr auto sort = quick_sort; -#elif LIBC_QSORT_IMPL == LIBC_QSORT_HEAP_SORT -constexpr auto sort = heap_sort; -#endif +template <bool USE_QUICKSORT, typename F> +LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len, + size_t elem_size, const F &is_less) { + if (array == nullptr || array_len == 0 || elem_size == 0) + return; + + if constexpr (USE_QUICKSORT) { + switch (elem_size) { + case 4: { + auto arr_fixed_size = internal::ArrayFixedSize<4>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + case 8: { + auto arr_fixed_size = internal::ArrayFixedSize<8>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + case 16: { + auto arr_fixed_size = internal::ArrayFixedSize<16>(array, array_len); + quick_sort(arr_fixed_size, is_less); + return; + } + default: + auto arr_generic_size = + internal::ArrayGenericSize(array, array_len, elem_size); + quick_sort(arr_generic_size, is_less); + return; + } + } else { + auto arr_generic_size = + internal::ArrayGenericSize(array, array_len, elem_size); + heap_sort(arr_generic_size, is_less); + } +} + +template <typename F> +LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size, + const F &is_less) { +#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT)) + unstable_sort_impl<USE_QUICK_SORT, F>(array, array_len, elem_size, is_less); +} } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/quick_sort.h b/libc/src/stdlib/quick_sort.h index 82b90a7..9ab2830 100644 --- a/libc/src/stdlib/quick_sort.h +++ b/libc/src/stdlib/quick_sort.h @@ -9,84 +9,175 @@ #ifndef LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H #define LLVM_LIBC_SRC_STDLIB_QUICK_SORT_H -#include "src/__support/macros/attributes.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/CPP/cstddef.h" #include "src/__support/macros/config.h" -#include "src/stdlib/qsort_data.h" +#include "src/stdlib/qsort_pivot.h" #include <stdint.h> namespace LIBC_NAMESPACE_DECL { namespace internal { -// A simple quicksort implementation using the Hoare partition scheme. -LIBC_INLINE size_t partition(const Array &array) { - const size_t array_size = array.size(); - size_t pivot_index = array_size / 2; - uint8_t *pivot = array.get(pivot_index); - size_t i = 0; - size_t j = array_size - 1; +// Branchless Lomuto partition based on the implementation by Lukas +// Bergdoll and Orson Peters +// https://github.com/Voultapher/sort-research-rs/blob/main/writeup/lomcyc_partition/text.md. +// Simplified to avoid having to stack allocate. +template <typename A, typename F> +LIBC_INLINE size_t partition_lomuto_branchless(const A &array, + const void *pivot, + const F &is_less) { + const size_t array_len = array.len(); + + size_t left = 0; + size_t right = 0; + + while (right < array_len) { + const bool right_is_lt = is_less(array.get(right), pivot); + array.swap(left, right); + left += static_cast<size_t>(right_is_lt); + right += 1; + } + + return left; +} + +// Optimized for large types that are expensive to move. Not optimized +// for integers. It's possible to use a cyclic permutation here for +// large types as done in ipnsort but the advantages of this are limited +// as `is_less` is a small wrapper around a call to a function pointer +// and won't incur much binary-size overhead. The other reason to use +// cyclic permutation is to have more efficient swapping, but we don't +// know the element size so this isn't applicable here either. +template <typename A, typename F> +LIBC_INLINE size_t partition_hoare_branchy(const A &array, const void *pivot, + const F &is_less) { + const size_t array_len = array.len(); + + size_t left = 0; + size_t right = array_len; while (true) { - int compare_i, compare_j; - - while ((compare_i = array.elem_compare(i, pivot)) < 0) - ++i; - while ((compare_j = array.elem_compare(j, pivot)) > 0) - --j; - - // At some point i will crossover j so we will definitely break out of - // this while loop. - if (i >= j) - return j + 1; - - array.swap(i, j); - - // The pivot itself might have got swapped so we will update the pivot. - if (i == pivot_index) { - pivot = array.get(j); - pivot_index = j; - } else if (j == pivot_index) { - pivot = array.get(i); - pivot_index = i; + while (left < right && is_less(array.get(left), pivot)) + ++left; + + while (true) { + --right; + if (left >= right || is_less(array.get(right), pivot)) { + break; + } } - if (compare_i == 0 && compare_j == 0) { - // If we do not move the pointers, we will end up with an - // infinite loop as i and j will be stuck without advancing. - ++i; - --j; - } + if (left >= right) + break; + + array.swap(left, right); + ++left; + } + + return left; +} + +template <typename A, typename F> +LIBC_INLINE size_t partition(const A &array, size_t pivot_index, + const F &is_less) { + // Place the pivot at the beginning of the array. + if (pivot_index != 0) { + array.swap(0, pivot_index); } + + const A array_without_pivot = array.make_array(1, array.len() - 1); + const void *pivot = array.get(0); + + size_t num_lt; + if constexpr (A::has_fixed_size()) { + // Branchless Lomuto avoid branch misprediction penalties, but + // it also swaps more often which is only faster if the swap is a fast + // constant operation. + num_lt = partition_lomuto_branchless(array_without_pivot, pivot, is_less); + } else { + num_lt = partition_hoare_branchy(array_without_pivot, pivot, is_less); + } + + // Place the pivot between the two partitions. + array.swap(0, num_lt); + + return num_lt; } -LIBC_INLINE void quick_sort(Array array) { +template <typename A, typename F> +LIBC_INLINE void quick_sort_impl(A &array, const void *ancestor_pivot, + size_t limit, const F &is_less) { while (true) { - const size_t array_size = array.size(); - if (array_size <= 1) + const size_t array_len = array.len(); + if (array_len <= 1) return; - size_t split_index = partition(array); - if (array_size == 2) - // The partition operation sorts the two element array. + + // If too many bad pivot choices were made, simply fall back to + // heapsort in order to guarantee `O(N x log(N))` worst-case. + if (limit == 0) { + heap_sort(array, is_less); return; + } - // Make Arrays describing the two sublists that still need sorting. - Array left = array.make_array(0, split_index); - Array right = array.make_array(split_index, array.size() - split_index); - - // Recurse to sort the smaller of the two, and then loop round within this - // function to sort the larger. This way, recursive call depth is bounded - // by log2 of the total array size, because every recursive call is sorting - // a list at most half the length of the one in its caller. - if (left.size() < right.size()) { - quick_sort(left); - array.reset_bounds(right.get(0), right.size()); - } else { - quick_sort(right); - array.reset_bounds(left.get(0), left.size()); + limit -= 1; + + const size_t pivot_index = choose_pivot(array, is_less); + + // If the chosen pivot is equal to the predecessor, then it's the smallest + // element in the slice. Partition the slice into elements equal to and + // elements greater than the pivot. This case is usually hit when the slice + // contains many duplicate elements. + if (ancestor_pivot) { + if (!is_less(ancestor_pivot, array.get(pivot_index))) { + const size_t num_lt = + partition(array, pivot_index, + [is_less](const void *a, const void *b) -> bool { + return !is_less(b, a); + }); + + // Continue sorting elements greater than the pivot. We know that + // `num_lt` cont + array.reset_bounds(num_lt + 1, array.len() - (num_lt + 1)); + ancestor_pivot = nullptr; + continue; + } } + + size_t split_index = partition(array, pivot_index, is_less); + + if (array_len == 2) + // The partition operation sorts the two element array. + return; + + // Split the array into `left`, `pivot`, and `right`. + A left = array.make_array(0, split_index); + const void *pivot = array.get(split_index); + const size_t right_start = split_index + 1; + A right = array.make_array(right_start, array.len() - right_start); + + // Recurse into the left side. We have a fixed recursion limit, + // testing shows no real benefit for recursing into the shorter + // side. + quick_sort_impl(left, ancestor_pivot, limit, is_less); + + // Continue with the right side. + array = right; + ancestor_pivot = pivot; } } +constexpr size_t ilog2(size_t n) { return cpp::bit_width(n) - 1; } + +template <typename A, typename F> +LIBC_INLINE void quick_sort(A &array, const F &is_less) { + const void *ancestor_pivot = nullptr; + // Limit the number of imbalanced partitions to `2 * floor(log2(len))`. + // The binary OR by one is used to eliminate the zero-check in the logarithm. + const size_t limit = 2 * ilog2((array.len() | 1)); + quick_sort_impl(array, ancestor_pivot, limit, is_less); +} + } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/CMakeLists.txt b/libc/src/time/CMakeLists.txt index ae835dc..ef9bfe5 100644 --- a/libc/src/time/CMakeLists.txt +++ b/libc/src/time/CMakeLists.txt @@ -2,6 +2,17 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) endif() +add_header_library( + time_constants + HDRS + time_constants.h + DEPENDS + libc.include.time + libc.src.__support.CPP.array + libc.src.__support.CPP.string_view + libc.hdr.types.time_t +) + add_object_library( time_utils SRCS @@ -12,6 +23,10 @@ add_object_library( libc.include.time libc.src.__support.CPP.limits libc.src.errno.errno + .time_constants + libc.hdr.types.time_t + libc.hdr.types.size_t + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -22,7 +37,9 @@ add_entrypoint_object( asctime.h DEPENDS .time_utils + .time_constants libc.include.time + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -33,7 +50,9 @@ add_entrypoint_object( asctime_r.h DEPENDS .time_utils + .time_constants libc.include.time + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -44,6 +63,7 @@ add_entrypoint_object( ctime.h DEPENDS .time_utils + .time_constants libc.hdr.types.time_t libc.include.time ) @@ -56,6 +76,7 @@ add_entrypoint_object( ctime_r.h DEPENDS .time_utils + .time_constants libc.hdr.types.time_t libc.include.time ) @@ -68,6 +89,7 @@ add_entrypoint_object( difftime.h DEPENDS libc.include.time + libc.hdr.types.time_t ) add_entrypoint_object( @@ -79,6 +101,8 @@ add_entrypoint_object( DEPENDS .time_utils libc.include.time + libc.hdr.types.time_t + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -90,6 +114,8 @@ add_entrypoint_object( DEPENDS .time_utils libc.include.time + libc.hdr.types.time_t + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -100,8 +126,11 @@ add_entrypoint_object( mktime.h DEPENDS .time_utils + .time_constants libc.include.time libc.src.errno.errno + libc.hdr.types.time_t + libc.hdr.types.struct_tm ) add_entrypoint_object( @@ -115,6 +144,7 @@ add_entrypoint_object( libc.hdr.types.time_t libc.src.__support.time.clock_gettime libc.src.errno.errno + libc.hdr.types.struct_tm ) add_entrypoint_object( diff --git a/libc/src/time/asctime.cpp b/libc/src/time/asctime.cpp index d6fbe73..2b00c41 100644 --- a/libc/src/time/asctime.cpp +++ b/libc/src/time/asctime.cpp @@ -9,15 +9,15 @@ #include "src/time/asctime.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/time/time_constants.h" #include "src/time/time_utils.h" namespace LIBC_NAMESPACE_DECL { -using LIBC_NAMESPACE::time_utils::TimeConstants; - LLVM_LIBC_FUNCTION(char *, asctime, (const struct tm *timeptr)) { - static char buffer[TimeConstants::ASCTIME_BUFFER_SIZE]; - return time_utils::asctime(timeptr, buffer, TimeConstants::ASCTIME_MAX_BYTES); + static char buffer[time_constants::ASCTIME_BUFFER_SIZE]; + return time_utils::asctime(timeptr, buffer, + time_constants::ASCTIME_MAX_BYTES); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/asctime.h b/libc/src/time/asctime.h index 623e6df..37325e7 100644 --- a/libc/src/time/asctime.h +++ b/libc/src/time/asctime.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_TIME_ASCTIME_H #define LLVM_LIBC_SRC_TIME_ASCTIME_H +#include "hdr/types/struct_tm.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/asctime_r.cpp b/libc/src/time/asctime_r.cpp index caa22f1..bf53bfd 100644 --- a/libc/src/time/asctime_r.cpp +++ b/libc/src/time/asctime_r.cpp @@ -9,15 +9,15 @@ #include "src/time/asctime_r.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/time/time_constants.h" #include "src/time/time_utils.h" namespace LIBC_NAMESPACE_DECL { -using LIBC_NAMESPACE::time_utils::TimeConstants; - LLVM_LIBC_FUNCTION(char *, asctime_r, (const struct tm *timeptr, char *buffer)) { - return time_utils::asctime(timeptr, buffer, TimeConstants::ASCTIME_MAX_BYTES); + return time_utils::asctime(timeptr, buffer, + time_constants::ASCTIME_MAX_BYTES); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/asctime_r.h b/libc/src/time/asctime_r.h index 328b7df..65a6b84 100644 --- a/libc/src/time/asctime_r.h +++ b/libc/src/time/asctime_r.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_TIME_ASCTIME_R_H #define LLVM_LIBC_SRC_TIME_ASCTIME_R_H +#include "hdr/types/struct_tm.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/ctime.cpp b/libc/src/time/ctime.cpp index 8adae9b..ac0ffe5 100644 --- a/libc/src/time/ctime.cpp +++ b/libc/src/time/ctime.cpp @@ -6,23 +6,22 @@ // //===----------------------------------------------------------------------===// -#include "ctime.h" +#include "src/time/ctime.h" #include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "time_utils.h" +#include "src/time/time_constants.h" +#include "src/time/time_utils.h" namespace LIBC_NAMESPACE_DECL { -using LIBC_NAMESPACE::time_utils::TimeConstants; - LLVM_LIBC_FUNCTION(char *, ctime, (const time_t *t_ptr)) { if (t_ptr == nullptr || *t_ptr > cpp::numeric_limits<int32_t>::max()) { return nullptr; } - static char buffer[TimeConstants::ASCTIME_BUFFER_SIZE]; + static char buffer[time_constants::ASCTIME_BUFFER_SIZE]; return time_utils::asctime(time_utils::localtime(t_ptr), buffer, - TimeConstants::ASCTIME_MAX_BYTES); + time_constants::ASCTIME_MAX_BYTES); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/ctime_r.cpp b/libc/src/time/ctime_r.cpp index 63d93c4..7224f77 100644 --- a/libc/src/time/ctime_r.cpp +++ b/libc/src/time/ctime_r.cpp @@ -6,16 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "ctime_r.h" +#include "src/time/ctime_r.h" #include "src/__support/CPP/limits.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" -#include "time_utils.h" +#include "src/time/time_constants.h" +#include "src/time/time_utils.h" namespace LIBC_NAMESPACE_DECL { -using LIBC_NAMESPACE::time_utils::TimeConstants; - LLVM_LIBC_FUNCTION(char *, ctime_r, (const time_t *t_ptr, char *buffer)) { if (t_ptr == nullptr || buffer == nullptr || *t_ptr > cpp::numeric_limits<int32_t>::max()) { @@ -23,7 +22,7 @@ LLVM_LIBC_FUNCTION(char *, ctime_r, (const time_t *t_ptr, char *buffer)) { } return time_utils::asctime(time_utils::localtime(t_ptr), buffer, - TimeConstants::ASCTIME_MAX_BYTES); + time_constants::ASCTIME_MAX_BYTES); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/difftime.h b/libc/src/time/difftime.h index d5cd593..12de567 100644 --- a/libc/src/time/difftime.h +++ b/libc/src/time/difftime.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_TIME_DIFFTIME_H #define LLVM_LIBC_SRC_TIME_DIFFTIME_H +#include "hdr/types/time_t.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/gmtime.h b/libc/src/time/gmtime.h index 3de3ceb..ac7f1be 100644 --- a/libc/src/time/gmtime.h +++ b/libc/src/time/gmtime.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_TIME_GMTIME_H #define LLVM_LIBC_SRC_TIME_GMTIME_H +#include "hdr/types/struct_tm.h" +#include "hdr/types/time_t.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/gmtime_r.h b/libc/src/time/gmtime_r.h index b4f387e..4c88b22 100644 --- a/libc/src/time/gmtime_r.h +++ b/libc/src/time/gmtime_r.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_TIME_GMTIME_R_H #define LLVM_LIBC_SRC_TIME_GMTIME_R_H +#include "hdr/types/struct_tm.h" +#include "hdr/types/time_t.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/gpu/clock.cpp b/libc/src/time/gpu/clock.cpp index add5b27..8609c5c 100644 --- a/libc/src/time/gpu/clock.cpp +++ b/libc/src/time/gpu/clock.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/time/clock.h" + +#include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/time/gpu/time_utils.h" diff --git a/libc/src/time/gpu/nanosleep.cpp b/libc/src/time/gpu/nanosleep.cpp index a92f660..d22d9d6 100644 --- a/libc/src/time/gpu/nanosleep.cpp +++ b/libc/src/time/gpu/nanosleep.cpp @@ -8,6 +8,7 @@ #include "src/time/nanosleep.h" +#include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/time/gpu/time_utils.h" diff --git a/libc/src/time/mktime.cpp b/libc/src/time/mktime.cpp index 72cd2291..3874cad 100644 --- a/libc/src/time/mktime.cpp +++ b/libc/src/time/mktime.cpp @@ -9,15 +9,11 @@ #include "src/time/mktime.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/time/time_constants.h" #include "src/time/time_utils.h" namespace LIBC_NAMESPACE_DECL { -using LIBC_NAMESPACE::time_utils::TimeConstants; - -static constexpr int NON_LEAP_YEAR_DAYS_IN_MONTH[] = {31, 28, 31, 30, 31, 30, - 31, 31, 30, 31, 30, 31}; - // Returns number of years from (1, year). static constexpr int64_t get_num_of_leap_years_before(int64_t year) { return (year / 4) - (year / 100) + (year / 400); @@ -31,12 +27,12 @@ static constexpr bool is_leap_year(const int64_t year) { LLVM_LIBC_FUNCTION(time_t, mktime, (struct tm * tm_out)) { // Unlike most C Library functions, mktime doesn't just die on bad input. // TODO(rtenneti); Handle leap seconds. - int64_t tm_year_from_base = tm_out->tm_year + TimeConstants::TIME_YEAR_BASE; + int64_t tm_year_from_base = tm_out->tm_year + time_constants::TIME_YEAR_BASE; // 32-bit end-of-the-world is 03:14:07 UTC on 19 January 2038. if (sizeof(time_t) == 4 && - tm_year_from_base >= TimeConstants::END_OF32_BIT_EPOCH_YEAR) { - if (tm_year_from_base > TimeConstants::END_OF32_BIT_EPOCH_YEAR) + tm_year_from_base >= time_constants::END_OF32_BIT_EPOCH_YEAR) { + if (tm_year_from_base > time_constants::END_OF32_BIT_EPOCH_YEAR) return time_utils::out_of_range(); if (tm_out->tm_mon > 0) return time_utils::out_of_range(); @@ -64,7 +60,7 @@ LLVM_LIBC_FUNCTION(time_t, mktime, (struct tm * tm_out)) { // Calculate number of months and years from tm_mon. int64_t month = tm_out->tm_mon; - if (month < 0 || month >= TimeConstants::MONTHS_PER_YEAR - 1) { + if (month < 0 || month >= time_constants::MONTHS_PER_YEAR - 1) { int64_t years = month / 12; month %= 12; if (month < 0) { @@ -78,23 +74,23 @@ LLVM_LIBC_FUNCTION(time_t, mktime, (struct tm * tm_out)) { // Calculate total number of days based on the month and the day (tm_mday). int64_t total_days = tm_out->tm_mday - 1; for (int64_t i = 0; i < month; ++i) - total_days += NON_LEAP_YEAR_DAYS_IN_MONTH[i]; + total_days += time_constants::NON_LEAP_YEAR_DAYS_IN_MONTH[i]; // Add one day if it is a leap year and the month is after February. if (tm_year_is_leap && month > 1) total_days++; // Calculate total numbers of days based on the year. - total_days += (tm_year_from_base - TimeConstants::EPOCH_YEAR) * - TimeConstants::DAYS_PER_NON_LEAP_YEAR; - if (tm_year_from_base >= TimeConstants::EPOCH_YEAR) { + total_days += (tm_year_from_base - time_constants::EPOCH_YEAR) * + time_constants::DAYS_PER_NON_LEAP_YEAR; + if (tm_year_from_base >= time_constants::EPOCH_YEAR) { total_days += get_num_of_leap_years_before(tm_year_from_base - 1) - - get_num_of_leap_years_before(TimeConstants::EPOCH_YEAR); + get_num_of_leap_years_before(time_constants::EPOCH_YEAR); } else if (tm_year_from_base >= 1) { - total_days -= get_num_of_leap_years_before(TimeConstants::EPOCH_YEAR) - + total_days -= get_num_of_leap_years_before(time_constants::EPOCH_YEAR) - get_num_of_leap_years_before(tm_year_from_base - 1); } else { // Calculate number of leap years until 0th year. - total_days -= get_num_of_leap_years_before(TimeConstants::EPOCH_YEAR) - + total_days -= get_num_of_leap_years_before(time_constants::EPOCH_YEAR) - get_num_of_leap_years_before(0); if (tm_year_from_base <= 0) { total_days -= 1; // Subtract 1 for 0th year. @@ -106,11 +102,12 @@ LLVM_LIBC_FUNCTION(time_t, mktime, (struct tm * tm_out)) { } } - // TODO(rtenneti): Need to handle timezone and update of tm_isdst. + // TODO: https://github.com/llvm/llvm-project/issues/121962 + // Need to handle timezone and update of tm_isdst. int64_t seconds = tm_out->tm_sec + - tm_out->tm_min * TimeConstants::SECONDS_PER_MIN + - tm_out->tm_hour * TimeConstants::SECONDS_PER_HOUR + - total_days * TimeConstants::SECONDS_PER_DAY; + tm_out->tm_min * time_constants::SECONDS_PER_MIN + + tm_out->tm_hour * time_constants::SECONDS_PER_HOUR + + total_days * time_constants::SECONDS_PER_DAY; // Update the tm structure's year, month, day, etc. from seconds. if (time_utils::update_from_seconds(seconds, tm_out) < 0) diff --git a/libc/src/time/mktime.h b/libc/src/time/mktime.h index 2b4c679..985c629 100644 --- a/libc/src/time/mktime.h +++ b/libc/src/time/mktime.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_TIME_MKTIME_H #define LLVM_LIBC_SRC_TIME_MKTIME_H +#include "hdr/types/struct_tm.h" +#include "hdr/types/time_t.h" #include "src/__support/macros/config.h" -#include <time.h> namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/time/time.cpp b/libc/src/time/time.cpp index 4a0b614..860909a 100644 --- a/libc/src/time/time.cpp +++ b/libc/src/time/time.cpp @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "src/time/time_func.h" + #include "hdr/time_macros.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/__support/time/clock_gettime.h" #include "src/errno/libc_errno.h" -#include "src/time/time_func.h" namespace LIBC_NAMESPACE_DECL { // avoid inconsitent clang-format behavior diff --git a/libc/src/time/time_constants.h b/libc/src/time/time_constants.h new file mode 100644 index 0000000..3e25f74 --- /dev/null +++ b/libc/src/time/time_constants.h @@ -0,0 +1,100 @@ +//===-- Collection of constants for time functions --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_TIME_TIME_CONSTANTS_H +#define LLVM_LIBC_SRC_TIME_TIME_CONSTANTS_H + +#include "hdr/types/time_t.h" +#include "src/__support/CPP/array.h" +#include "src/__support/CPP/string_view.h" +#include <stdint.h> + +namespace LIBC_NAMESPACE_DECL { +namespace time_constants { + +enum Month : int { + JANUARY, + FEBRUARY, + MARCH, + APRIL, + MAY, + JUNE, + JULY, + AUGUST, + SEPTEMBER, + OCTOBER, + NOVEMBER, + DECEMBER +}; + +constexpr int SECONDS_PER_MIN = 60; +constexpr int MINUTES_PER_HOUR = 60; +constexpr int HOURS_PER_DAY = 24; +constexpr int DAYS_PER_WEEK = 7; +constexpr int MONTHS_PER_YEAR = 12; +constexpr int DAYS_PER_NON_LEAP_YEAR = 365; +constexpr int DAYS_PER_LEAP_YEAR = 366; + +constexpr int SECONDS_PER_HOUR = SECONDS_PER_MIN * MINUTES_PER_HOUR; +constexpr int SECONDS_PER_DAY = SECONDS_PER_HOUR * HOURS_PER_DAY; +constexpr int NUMBER_OF_SECONDS_IN_LEAP_YEAR = + DAYS_PER_LEAP_YEAR * SECONDS_PER_DAY; + +constexpr int TIME_YEAR_BASE = 1900; +constexpr int EPOCH_YEAR = 1970; +constexpr int EPOCH_WEEK_DAY = 4; + +// For asctime the behavior is undefined if struct tm's tm_wday or tm_mon are +// not within the normal ranges as defined in <time.h>, or if struct tm's +// tm_year exceeds {INT_MAX}-1990, or if the below asctime_internal algorithm +// would attempt to generate more than 26 bytes of output (including the +// terminating null). +constexpr int ASCTIME_BUFFER_SIZE = 256; +constexpr int ASCTIME_MAX_BYTES = 26; + +/* 2000-03-01 (mod 400 year, immediately after feb29 */ +constexpr int64_t SECONDS_UNTIL2000_MARCH_FIRST = + (946684800LL + SECONDS_PER_DAY * (31 + 29)); +constexpr int WEEK_DAY_OF2000_MARCH_FIRST = 3; + +constexpr int DAYS_PER400_YEARS = + (DAYS_PER_NON_LEAP_YEAR * 400) + (400 / 4) - 3; +constexpr int DAYS_PER100_YEARS = + (DAYS_PER_NON_LEAP_YEAR * 100) + (100 / 4) - 1; +constexpr int DAYS_PER4_YEARS = (DAYS_PER_NON_LEAP_YEAR * 4) + 1; + +// The latest time that can be represented in this form is 03:14:07 UTC on +// Tuesday, 19 January 2038 (corresponding to 2,147,483,647 seconds since the +// start of the epoch). This means that systems using a 32-bit time_t type are +// susceptible to the Year 2038 problem. +constexpr int END_OF32_BIT_EPOCH_YEAR = 2038; + +constexpr time_t OUT_OF_RANGE_RETURN_VALUE = -1; + +constexpr cpp::array<cpp::string_view, DAYS_PER_WEEK> WEEK_DAY_NAMES = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; + +constexpr cpp::array<cpp::string_view, DAYS_PER_WEEK> WEEK_DAY_FULL_NAMES = { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"}; + +constexpr cpp::array<cpp::string_view, MONTHS_PER_YEAR> MONTH_NAMES = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + +constexpr cpp::array<cpp::string_view, MONTHS_PER_YEAR> MONTH_FULL_NAMES = { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"}; + +constexpr int NON_LEAP_YEAR_DAYS_IN_MONTH[] = {31, 28, 31, 30, 31, 30, + 31, 31, 30, 31, 30, 31}; + +} // namespace time_constants +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_TIME_TIME_CONSTANTS_H diff --git a/libc/src/time/time_utils.cpp b/libc/src/time/time_utils.cpp index 509cad8..abc93b8 100644 --- a/libc/src/time/time_utils.cpp +++ b/libc/src/time/time_utils.cpp @@ -10,12 +10,11 @@ #include "src/__support/CPP/limits.h" // INT_MIN, INT_MAX #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/time/time_constants.h" namespace LIBC_NAMESPACE_DECL { namespace time_utils { -using LIBC_NAMESPACE::time_utils::TimeConstants; - static int64_t computeRemainingYears(int64_t daysPerYears, int64_t quotientYears, int64_t *remainingDays) { @@ -52,36 +51,36 @@ int64_t update_from_seconds(int64_t total_seconds, struct tm *tm) { (sizeof(time_t) == 4) ? INT_MIN : INT_MIN * static_cast<int64_t>( - TimeConstants::NUMBER_OF_SECONDS_IN_LEAP_YEAR); + time_constants::NUMBER_OF_SECONDS_IN_LEAP_YEAR); constexpr time_t time_max = (sizeof(time_t) == 4) ? INT_MAX : INT_MAX * static_cast<int64_t>( - TimeConstants::NUMBER_OF_SECONDS_IN_LEAP_YEAR); + time_constants::NUMBER_OF_SECONDS_IN_LEAP_YEAR); time_t ts = static_cast<time_t>(total_seconds); if (ts < time_min || ts > time_max) return time_utils::out_of_range(); int64_t seconds = - total_seconds - TimeConstants::SECONDS_UNTIL2000_MARCH_FIRST; - int64_t days = seconds / TimeConstants::SECONDS_PER_DAY; - int64_t remainingSeconds = seconds % TimeConstants::SECONDS_PER_DAY; + total_seconds - time_constants::SECONDS_UNTIL2000_MARCH_FIRST; + int64_t days = seconds / time_constants::SECONDS_PER_DAY; + int64_t remainingSeconds = seconds % time_constants::SECONDS_PER_DAY; if (remainingSeconds < 0) { - remainingSeconds += TimeConstants::SECONDS_PER_DAY; + remainingSeconds += time_constants::SECONDS_PER_DAY; days--; } - int64_t wday = (TimeConstants::WEEK_DAY_OF2000_MARCH_FIRST + days) % - TimeConstants::DAYS_PER_WEEK; + int64_t wday = (time_constants::WEEK_DAY_OF2000_MARCH_FIRST + days) % + time_constants::DAYS_PER_WEEK; if (wday < 0) - wday += TimeConstants::DAYS_PER_WEEK; + wday += time_constants::DAYS_PER_WEEK; // Compute the number of 400 year cycles. - int64_t numOfFourHundredYearCycles = days / TimeConstants::DAYS_PER400_YEARS; - int64_t remainingDays = days % TimeConstants::DAYS_PER400_YEARS; + int64_t numOfFourHundredYearCycles = days / time_constants::DAYS_PER400_YEARS; + int64_t remainingDays = days % time_constants::DAYS_PER400_YEARS; if (remainingDays < 0) { - remainingDays += TimeConstants::DAYS_PER400_YEARS; + remainingDays += time_constants::DAYS_PER400_YEARS; numOfFourHundredYearCycles--; } @@ -89,17 +88,17 @@ int64_t update_from_seconds(int64_t total_seconds, struct tm *tm) { // "four hundred year cycles" will be 4 hundred year cycles or less in 400 // years. int64_t numOfHundredYearCycles = computeRemainingYears( - TimeConstants::DAYS_PER100_YEARS, 4, &remainingDays); + time_constants::DAYS_PER100_YEARS, 4, &remainingDays); // The remaining number of years after computing the number of // "hundred year cycles" will be 25 four year cycles or less in 100 years. - int64_t numOfFourYearCycles = - computeRemainingYears(TimeConstants::DAYS_PER4_YEARS, 25, &remainingDays); + int64_t numOfFourYearCycles = computeRemainingYears( + time_constants::DAYS_PER4_YEARS, 25, &remainingDays); // The remaining number of years after computing the number of // "four year cycles" will be 4 one year cycles or less in 4 years. int64_t remainingYears = computeRemainingYears( - TimeConstants::DAYS_PER_NON_LEAP_YEAR, 4, &remainingDays); + time_constants::DAYS_PER_NON_LEAP_YEAR, 4, &remainingDays); // Calculate number of years from year 2000. int64_t years = remainingYears + 4 * numOfFourYearCycles + @@ -112,8 +111,8 @@ int64_t update_from_seconds(int64_t total_seconds, struct tm *tm) { // We add 31 and 28 for the number of days in January and February, since our // starting point was March 1st. int64_t yday = remainingDays + 31 + 28 + leapDay; - if (yday >= TimeConstants::DAYS_PER_NON_LEAP_YEAR + leapDay) - yday -= TimeConstants::DAYS_PER_NON_LEAP_YEAR + leapDay; + if (yday >= time_constants::DAYS_PER_NON_LEAP_YEAR + leapDay) + yday -= time_constants::DAYS_PER_NON_LEAP_YEAR + leapDay; int64_t months = 0; while (daysInMonth[months] <= remainingDays) { @@ -121,8 +120,8 @@ int64_t update_from_seconds(int64_t total_seconds, struct tm *tm) { months++; } - if (months >= TimeConstants::MONTHS_PER_YEAR - 2) { - months -= TimeConstants::MONTHS_PER_YEAR; + if (months >= time_constants::MONTHS_PER_YEAR - 2) { + months -= time_constants::MONTHS_PER_YEAR; years++; } @@ -131,19 +130,19 @@ int64_t update_from_seconds(int64_t total_seconds, struct tm *tm) { // All the data (years, month and remaining days) was calculated from // March, 2000. Thus adjust the data to be from January, 1900. - tm->tm_year = static_cast<int>(years + 2000 - TimeConstants::TIME_YEAR_BASE); + tm->tm_year = static_cast<int>(years + 2000 - time_constants::TIME_YEAR_BASE); tm->tm_mon = static_cast<int>(months + 2); tm->tm_mday = static_cast<int>(remainingDays + 1); tm->tm_wday = static_cast<int>(wday); tm->tm_yday = static_cast<int>(yday); tm->tm_hour = - static_cast<int>(remainingSeconds / TimeConstants::SECONDS_PER_HOUR); + static_cast<int>(remainingSeconds / time_constants::SECONDS_PER_HOUR); tm->tm_min = - static_cast<int>(remainingSeconds / TimeConstants::SECONDS_PER_MIN % - TimeConstants::SECONDS_PER_MIN); + static_cast<int>(remainingSeconds / time_constants::SECONDS_PER_MIN % + time_constants::SECONDS_PER_MIN); tm->tm_sec = - static_cast<int>(remainingSeconds % TimeConstants::SECONDS_PER_MIN); + static_cast<int>(remainingSeconds % time_constants::SECONDS_PER_MIN); // TODO(rtenneti): Need to handle timezone and update of tm_isdst. tm->tm_isdst = 0; diff --git a/libc/src/time/time_utils.h b/libc/src/time/time_utils.h index 552ea92..5e0a692 100644 --- a/libc/src/time/time_utils.h +++ b/libc/src/time/time_utils.h @@ -9,79 +9,19 @@ #ifndef LLVM_LIBC_SRC_TIME_TIME_UTILS_H #define LLVM_LIBC_SRC_TIME_TIME_UTILS_H -#include <stddef.h> // For size_t. - +#include "hdr/types/size_t.h" +#include "hdr/types/struct_tm.h" +#include "hdr/types/time_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include "src/time/mktime.h" +#include "time_constants.h" #include <stdint.h> namespace LIBC_NAMESPACE_DECL { namespace time_utils { -enum Month : int { - JANUARY, - FEBRUARY, - MARCH, - APRIL, - MAY, - JUNE, - JULY, - AUGUST, - SEPTEMBER, - OCTOBER, - NOVEMBER, - DECEMBER -}; - -struct TimeConstants { - static constexpr int SECONDS_PER_MIN = 60; - static constexpr int MINUTES_PER_HOUR = 60; - static constexpr int HOURS_PER_DAY = 24; - static constexpr int DAYS_PER_WEEK = 7; - static constexpr int MONTHS_PER_YEAR = 12; - static constexpr int DAYS_PER_NON_LEAP_YEAR = 365; - static constexpr int DAYS_PER_LEAP_YEAR = 366; - - static constexpr int SECONDS_PER_HOUR = SECONDS_PER_MIN * MINUTES_PER_HOUR; - static constexpr int SECONDS_PER_DAY = SECONDS_PER_HOUR * HOURS_PER_DAY; - static constexpr int NUMBER_OF_SECONDS_IN_LEAP_YEAR = - DAYS_PER_LEAP_YEAR * SECONDS_PER_DAY; - - static constexpr int TIME_YEAR_BASE = 1900; - static constexpr int EPOCH_YEAR = 1970; - static constexpr int EPOCH_WEEK_DAY = 4; - - // For asctime the behavior is undefined if struct tm's tm_wday or tm_mon are - // not within the normal ranges as defined in <time.h>, or if struct tm's - // tm_year exceeds {INT_MAX}-1990, or if the below asctime_internal algorithm - // would attempt to generate more than 26 bytes of output (including the - // terminating null). - static constexpr int ASCTIME_BUFFER_SIZE = 256; - static constexpr int ASCTIME_MAX_BYTES = 26; - - /* 2000-03-01 (mod 400 year, immediately after feb29 */ - static constexpr int64_t SECONDS_UNTIL2000_MARCH_FIRST = - (946684800LL + SECONDS_PER_DAY * (31 + 29)); - static constexpr int WEEK_DAY_OF2000_MARCH_FIRST = 3; - - static constexpr int DAYS_PER400_YEARS = - (DAYS_PER_NON_LEAP_YEAR * 400) + (400 / 4) - 3; - static constexpr int DAYS_PER100_YEARS = - (DAYS_PER_NON_LEAP_YEAR * 100) + (100 / 4) - 1; - static constexpr int DAYS_PER4_YEARS = (DAYS_PER_NON_LEAP_YEAR * 4) + 1; - - // The latest time that can be represented in this form is 03:14:07 UTC on - // Tuesday, 19 January 2038 (corresponding to 2,147,483,647 seconds since the - // start of the epoch). This means that systems using a 32-bit time_t type are - // susceptible to the Year 2038 problem. - static constexpr int END_OF32_BIT_EPOCH_YEAR = 2038; - - static constexpr time_t OUT_OF_RANGE_RETURN_VALUE = -1; -}; - // Update the "tm" structure's year, month, etc. members from seconds. // "total_seconds" is the number of seconds since January 1st, 1970. extern int64_t update_from_seconds(int64_t total_seconds, struct tm *tm); @@ -98,7 +38,7 @@ LIBC_INLINE time_t out_of_range() { // require it. libc_errno = EOVERFLOW; #endif - return TimeConstants::OUT_OF_RANGE_RETURN_VALUE; + return time_constants::OUT_OF_RANGE_RETURN_VALUE; } LIBC_INLINE void invalid_value() { libc_errno = EINVAL; } @@ -110,32 +50,23 @@ LIBC_INLINE char *asctime(const struct tm *timeptr, char *buffer, return nullptr; } if (timeptr->tm_wday < 0 || - timeptr->tm_wday > (TimeConstants::DAYS_PER_WEEK - 1)) { + timeptr->tm_wday > (time_constants::DAYS_PER_WEEK - 1)) { invalid_value(); return nullptr; } if (timeptr->tm_mon < 0 || - timeptr->tm_mon > (TimeConstants::MONTHS_PER_YEAR - 1)) { + timeptr->tm_mon > (time_constants::MONTHS_PER_YEAR - 1)) { invalid_value(); return nullptr; } - // TODO(rtenneti): i18n the following strings. - static const char *week_days_name[TimeConstants::DAYS_PER_WEEK] = { - "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; - - static const char *months_name[TimeConstants::MONTHS_PER_YEAR] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; - - // TODO(michaelr): look into removing this call to __builtin_snprintf that may - // be emitted as a call to snprintf. Alternatively, look into using our - // internal printf machinery. + // TODO(michaelr): move this to use the strftime machinery int written_size = __builtin_snprintf( buffer, bufferLength, "%.3s %.3s%3d %.2d:%.2d:%.2d %d\n", - week_days_name[timeptr->tm_wday], months_name[timeptr->tm_mon], - timeptr->tm_mday, timeptr->tm_hour, timeptr->tm_min, timeptr->tm_sec, - TimeConstants::TIME_YEAR_BASE + timeptr->tm_year); + time_constants::WEEK_DAY_NAMES[timeptr->tm_wday].data(), + time_constants::MONTH_NAMES[timeptr->tm_mon].data(), timeptr->tm_mday, + timeptr->tm_hour, timeptr->tm_min, timeptr->tm_sec, + time_constants::TIME_YEAR_BASE + timeptr->tm_year); if (written_size < 0) return nullptr; if (static_cast<size_t>(written_size) >= bufferLength) { diff --git a/libc/src/unistd/linux/dup2.cpp b/libc/src/unistd/linux/dup2.cpp index c7c7c1a..7ffc151 100644 --- a/libc/src/unistd/linux/dup2.cpp +++ b/libc/src/unistd/linux/dup2.cpp @@ -32,7 +32,6 @@ LLVM_LIBC_FUNCTION(int, dup2, (int oldfd, int newfd)) { int ret = LIBC_NAMESPACE::syscall_impl<int>(SYS_fcntl, oldfd, F_GETFD); #elif defined(SYS_fcntl64) // Same as fcntl but can handle large offsets - static_assert(sizeof(off_t) == 8); int ret = LIBC_NAMESPACE::syscall_impl<int>(SYS_fcntl64, oldfd, F_GETFD); #else #error "SYS_fcntl and SYS_fcntl64 syscalls not available." |