diff options
author | H.J. Lu <hjl.tools@gmail.com> | 2024-11-26 16:15:25 +0800 |
---|---|---|
committer | H.J. Lu <hjl.tools@gmail.com> | 2024-12-04 04:28:15 +0800 |
commit | 1c4cebb84b9e33aea9a90adfadb031d1f1eba927 (patch) | |
tree | b8c59eb6388c942da858d8a1881e739c17493d73 /sysdeps | |
parent | f43eb2cf30fdff39bda1c2018246d4badabbc576 (diff) | |
download | glibc-1c4cebb84b9e33aea9a90adfadb031d1f1eba927.zip glibc-1c4cebb84b9e33aea9a90adfadb031d1f1eba927.tar.gz glibc-1c4cebb84b9e33aea9a90adfadb031d1f1eba927.tar.bz2 |
malloc: Optimize small memory clearing for calloc
Add calloc-clear-memory.h to clear memory size up to 36 bytes (72 bytes
on 64-bit targets) for calloc. Use repeated stores with 1 branch, instead
of up to 3 branches. On x86-64, it is faster than memset since calling
memset needs 1 indirect branch, 1 broadcast, and up to 4 branches.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/generic/calloc-clear-memory.h | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/sysdeps/generic/calloc-clear-memory.h b/sysdeps/generic/calloc-clear-memory.h new file mode 100644 index 0000000..1f9d70d --- /dev/null +++ b/sysdeps/generic/calloc-clear-memory.h @@ -0,0 +1,49 @@ +/* Clear a block of memory for calloc. Generic version. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +static __always_inline void * +clear_memory (INTERNAL_SIZE_T *d, unsigned long clearsize) +{ + /* Unroll clear memory size up to 9 * INTERNAL_SIZE_T bytes. We know + that contents have an odd number of INTERNAL_SIZE_T-sized words; + minimally 3 words. */ + unsigned long nclears = clearsize / sizeof (INTERNAL_SIZE_T); + + if (nclears > 9) + return memset (d, 0, clearsize); + + /* NB: The VRP pass in GCC 14.2 will optimize it out. */ + if (nclears < 3) + __builtin_unreachable (); + + /* Use repeated stores with 1 branch, instead of up to 3. */ + *(d + 0) = 0; + *(d + 1) = 0; + *(d + 2) = 0; + *(d + nclears - 2) = 0; + *(d + nclears - 2 + 1) = 0; + if (nclears > 5) + { + *(d + 3) = 0; + *(d + 3 + 1) = 0; + *(d + nclears - 4) = 0; + *(d + nclears - 4 + 1) = 0; + } + + return d; +} |