1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
//===-- Data structures for sorting routines --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
#define LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/cstddef.h"
#include "src/__support/macros/config.h"
#include "src/string/memory_utils/inline_memcpy.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
class ArrayGenericSize {
cpp::byte *array_base;
size_t array_len;
size_t elem_size;
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
return array_base + (i * elem_size);
}
public:
LIBC_INLINE ArrayGenericSize(void *a, size_t s, size_t e)
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s),
elem_size(e) {}
static constexpr bool has_fixed_size() { return false; }
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
LIBC_INLINE void swap(size_t i, size_t j) const {
// It's possible to use 8 byte blocks with `uint64_t`, but that
// generates more machine code as the remainder loop gets
// unrolled, plus 4 byte operations are more likely to be
// efficient on a wider variety of hardware. On x86 LLVM tends
// to unroll the block loop again into 2 16 byte swaps per
// iteration which is another reason that 4 byte blocks yields
// good performance even for big types.
using block_t = uint32_t;
constexpr size_t BLOCK_SIZE = sizeof(block_t);
alignas(block_t) cpp::byte tmp_block[BLOCK_SIZE];
cpp::byte *elem_i = get_internal(i);
cpp::byte *elem_j = get_internal(j);
const size_t elem_size_rem = elem_size % BLOCK_SIZE;
const cpp::byte *elem_i_block_end = elem_i + (elem_size - elem_size_rem);
while (elem_i != elem_i_block_end) {
inline_memcpy(tmp_block, elem_i, BLOCK_SIZE);
inline_memcpy(elem_i, elem_j, BLOCK_SIZE);
inline_memcpy(elem_j, tmp_block, BLOCK_SIZE);
elem_i += BLOCK_SIZE;
elem_j += BLOCK_SIZE;
}
for (size_t n = 0; n < elem_size_rem; ++n) {
cpp::byte tmp = elem_i[n];
elem_i[n] = elem_j[n];
elem_j[n] = tmp;
}
}
LIBC_INLINE size_t len() const { return array_len; }
// Make an Array starting at index |i| and length |s|.
LIBC_INLINE ArrayGenericSize make_array(size_t i, size_t s) const {
return ArrayGenericSize(get_internal(i), s, elem_size);
}
// Reset this Array to point at a different interval of the same
// items starting at index |i|.
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
array_base = get_internal(i);
array_len = s;
}
};
// Having a specialized Array type for sorting that knows at
// compile-time what the size of the element is, allows for much more
// efficient swapping and for cheaper offset calculations.
template <size_t ELEM_SIZE> class ArrayFixedSize {
cpp::byte *array_base;
size_t array_len;
LIBC_INLINE cpp::byte *get_internal(size_t i) const {
return array_base + (i * ELEM_SIZE);
}
public:
LIBC_INLINE ArrayFixedSize(void *a, size_t s)
: array_base(reinterpret_cast<cpp::byte *>(a)), array_len(s) {}
// Beware this function is used a heuristic for cheap to swap types, so
// instantiating `ArrayFixedSize` with `ELEM_SIZE > 100` is probably a bad
// idea perf wise.
static constexpr bool has_fixed_size() { return true; }
LIBC_INLINE void *get(size_t i) const { return get_internal(i); }
LIBC_INLINE void swap(size_t i, size_t j) const {
alignas(32) cpp::byte tmp[ELEM_SIZE];
cpp::byte *elem_i = get_internal(i);
cpp::byte *elem_j = get_internal(j);
inline_memcpy(tmp, elem_i, ELEM_SIZE);
__builtin_memmove(elem_i, elem_j, ELEM_SIZE);
inline_memcpy(elem_j, tmp, ELEM_SIZE);
}
LIBC_INLINE size_t len() const { return array_len; }
// Make an Array starting at index |i| and length |s|.
LIBC_INLINE ArrayFixedSize<ELEM_SIZE> make_array(size_t i, size_t s) const {
return ArrayFixedSize<ELEM_SIZE>(get_internal(i), s);
}
// Reset this Array to point at a different interval of the same
// items starting at index |i|.
LIBC_INLINE void reset_bounds(size_t i, size_t s) {
array_base = get_internal(i);
array_len = s;
}
};
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STDLIB_QSORT_DATA_H
|