diff options
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | Rules | 19 | ||||
-rw-r--r-- | elf/dl-tunables.list | 5 | ||||
-rw-r--r-- | elf/tst-rtld-list-tunables.exp | 1 | ||||
-rw-r--r-- | malloc/Makefile | 16 | ||||
-rw-r--r-- | malloc/arena.c | 5 | ||||
-rw-r--r-- | malloc/malloc-internal.h | 1 | ||||
-rw-r--r-- | malloc/malloc.c | 47 | ||||
-rw-r--r-- | manual/tunables.texi | 10 | ||||
-rw-r--r-- | sysdeps/generic/Makefile | 8 | ||||
-rw-r--r-- | sysdeps/generic/malloc-hugepages.c | 31 | ||||
-rw-r--r-- | sysdeps/generic/malloc-hugepages.h | 37 | ||||
-rw-r--r-- | sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 |
13 files changed, 259 insertions, 0 deletions
@@ -92,6 +92,11 @@ Major new features: --enable-static-pie, which no longer has any effect on the build configuration. +* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to + make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. + Setting this might improve performance with Transparent Huge Pages madvise + mode depending of the workload. + Deprecated and removed features, and other changes affecting compatibility: * On x86-64, the LD_PREFER_MAP_32BIT_EXEC environment variable support @@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ $(tests-container:%=$(objpfx)%.out) \ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \ + $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ $(tests-special) $(tests-printers-out) xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) endif @@ -168,6 +169,7 @@ tests-expected = else tests-expected = $(tests) $(tests-internal) $(tests-printers) \ $(tests-container) $(tests-malloc-check:%=%-malloc-check) \ + $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ $(tests-mcheck:%=%-mcheck) endif tests: @@ -196,6 +198,7 @@ binaries-pie-notests = endif binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check) +binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) else binaries-all-notests = binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) @@ -207,6 +210,7 @@ binaries-pie-tests = binaries-pie-notests = binaries-mcheck-tests = binaries-malloc-check-tests = +binaries-malloc-hugetlb1-tests = endif binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) @@ -247,6 +251,14 @@ $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \ $(+link-tests) endif +ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" "" +$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \ + $(link-extra-libs-tests) \ + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) + $(+link-tests) +endif + ifneq "$(strip $(binaries-pie-tests))" "" $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ $(link-extra-libs-tests) \ @@ -284,6 +296,13 @@ $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \ endef $(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t)))) +# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 +define malloc-hugetlb1-ENVS +$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1 +endef +$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t)))) + + # mcheck tests need the debug DSO to support -lmcheck. define mcheck-ENVS $(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list index ffcd7f1..d1fd3f3 100644 --- a/elf/dl-tunables.list +++ b/elf/dl-tunables.list @@ -92,6 +92,11 @@ glibc { minval: 0 security_level: SXID_IGNORE } + hugetlb { + type: INT_32 + minval: 0 + maxval: 1 + } } cpu { hwcap_mask { diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp index 44e4834..d8e363f 100644 --- a/elf/tst-rtld-list-tunables.exp +++ b/elf/tst-rtld-list-tunables.exp @@ -1,6 +1,7 @@ glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) glibc.malloc.check: 0 (min: 0, max: 3) +glibc.malloc.hugetlb: 0 (min: 0, max: 1) glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) diff --git a/malloc/Makefile b/malloc/Makefile index 63cd7c0..0137595 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ $(tests-static),$(tests)) +# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the +# Transparent Huge Pages support. We need exclude some tests that define +# the ENV vars. +tests-exclude-hugetlb1 = \ + tst-compathooks-off \ + tst-compathooks-on \ + tst-interpose-nothread \ + tst-interpose-thread \ + tst-interpose-static-nothread \ + tst-interpose-static-thread \ + tst-malloc-usable \ + tst-malloc-usable-tunables \ + tst-mallocstate +tests-malloc-hugetlb1 = \ + $(filter-out $(tests-exclude-hugetlb1), $(tests)) + # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) # Tests that don't play well with mcheck. They are either bugs in mcheck or diff --git a/malloc/arena.c b/malloc/arena.c index 78ef4cf..cd00c7b 100644 --- a/malloc/arena.c +++ b/malloc/arena.c @@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t) TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t) #endif TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t) +TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t) #else /* Initialization routine. */ #include <string.h> @@ -330,6 +331,7 @@ ptmalloc_init (void) TUNABLE_CALLBACK (set_tcache_unsorted_limit)); # endif TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast)); + TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb)); #else if (__glibc_likely (_environ != NULL)) { @@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad) __munmap (p2, HEAP_MAX_SIZE); return 0; } + + madvise_thp (p2, size); + h = (heap_info *) p2; h->size = size; h->mprotect_size = size; diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h index 0c7b5a1..7493e34 100644 --- a/malloc/malloc-internal.h +++ b/malloc/malloc-internal.h @@ -22,6 +22,7 @@ #include <malloc-machine.h> #include <malloc-sysdep.h> #include <malloc-size.h> +#include <malloc-hugepages.h> /* Called in the parent process before a fork. */ void __malloc_fork_lock_parent (void) attribute_hidden; diff --git a/malloc/malloc.c b/malloc/malloc.c index 095d97a..c75841b 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1880,6 +1880,11 @@ struct malloc_par INTERNAL_SIZE_T arena_test; INTERNAL_SIZE_T arena_max; +#if HAVE_TUNABLES + /* Transparent Large Page support. */ + INTERNAL_SIZE_T thp_pagesize; +#endif + /* Memory map support */ int n_mmaps; int n_mmaps_max; @@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n) #include <stap-probe.h> +/* ----------- Routines dealing with transparent huge pages ----------- */ + +static inline void +madvise_thp (void *p, INTERNAL_SIZE_T size) +{ +#if HAVE_TUNABLES && defined (MADV_HUGEPAGE) + /* Do not consider areas smaller than a huge page or if the tunable is + not active. */ + if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize) + return; + __madvise (p, size, MADV_HUGEPAGE); +#endif +} + /* ------------------- Support for multiple arenas -------------------- */ #include "arena.c" @@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) if (mm != MAP_FAILED) { + madvise_thp (mm, size); + /* The offset to the start of the mmapped region is stored in the prev_size field of the chunk. This allows us to adjust @@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) if (size > 0) { brk = (char *) (MORECORE (size)); + if (brk != (char *) (MORECORE_FAILURE)) + madvise_thp (brk, size); LIBC_PROBE (memory_sbrk_more, 2, brk, size); } @@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) if (mbrk != MAP_FAILED) { + madvise_thp (mbrk, size); + /* We do not need, and cannot use, another sbrk call to find end */ brk = mbrk; snd_brk = brk + size; @@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) correction = 0; snd_brk = (char *) (MORECORE (0)); } + else + madvise_thp (snd_brk, correction); } /* handle non-contiguous cases */ @@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size) if (cp == MAP_FAILED) return 0; + madvise_thp (cp, new_size); + p = (mchunkptr) (cp + offset); assert (aligned_OK (chunk2mem (p))); @@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value) return 0; } +#if HAVE_TUNABLES +static __always_inline int +do_set_hugetlb (int32_t value) +{ + if (value == 1) + { + enum malloc_thp_mode_t thp_mode = __malloc_thp_mode (); + /* + Only enable THP madvise usage if system does support it and + has 'madvise' mode. Otherwise the madvise() call is wasteful. + */ + if (thp_mode == malloc_thp_mode_madvise) + mp_.thp_pagesize = __malloc_default_thp_pagesize (); + } + return 0; +} +#endif + int __libc_mallopt (int param_number, int value) { diff --git a/manual/tunables.texi b/manual/tunables.texi index 28ff502..9ca6e3f 100644 --- a/manual/tunables.texi +++ b/manual/tunables.texi @@ -270,6 +270,16 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size passed to @code{malloc} for the largest bin size to enable. @end deftp +@deftp Tunable glibc.malloc.hugetlb +This tunable controls the usage of Huge Pages on @code{malloc} calls. The +default value is @code{0}, which disables any additional support on +@code{malloc}. + +Setting its value to @code{1} enables the use of @code{madvise} with +@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled +only if the system supports Transparent Huge Page (currently only on Linux). +@end deftp + @node Dynamic Linking Tunables @section Dynamic Linking Tunables @cindex dynamic linking tunables diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile index a209e85..8eef83c 100644 --- a/sysdeps/generic/Makefile +++ b/sysdeps/generic/Makefile @@ -27,3 +27,11 @@ sysdep_routines += framestate unwind-pe shared-only-routines += framestate unwind-pe endif endif + +ifeq ($(subdir),malloc) +sysdep_malloc_debug_routines += malloc-hugepages +endif + +ifeq ($(subdir),misc) +sysdep_routines += malloc-hugepages +endif diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c new file mode 100644 index 0000000..8fb459a --- /dev/null +++ b/sysdeps/generic/malloc-hugepages.c @@ -0,0 +1,31 @@ +/* Huge Page support. Generic implementation. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <https://www.gnu.org/licenses/>. */ + +#include <malloc-hugepages.h> + +unsigned long int +__malloc_default_thp_pagesize (void) +{ + return 0; +} + +enum malloc_thp_mode_t +__malloc_thp_mode (void) +{ + return malloc_thp_mode_not_supported; +} diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h new file mode 100644 index 0000000..f5a442e --- /dev/null +++ b/sysdeps/generic/malloc-hugepages.h @@ -0,0 +1,37 @@ +/* Malloc huge page support. Generic implementation. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <https://www.gnu.org/licenses/>. */ + +#ifndef _MALLOC_HUGEPAGES_H +#define _MALLOC_HUGEPAGES_H + +#include <stddef.h> + +/* Return the default transparent huge page size. */ +unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden; + +enum malloc_thp_mode_t +{ + malloc_thp_mode_always, + malloc_thp_mode_madvise, + malloc_thp_mode_never, + malloc_thp_mode_not_supported +}; + +enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden; + +#endif /* _MALLOC_HUGEPAGES_H */ diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c new file mode 100644 index 0000000..7497e07 --- /dev/null +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c @@ -0,0 +1,74 @@ +/* Huge Page support. Linux implementation. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <https://www.gnu.org/licenses/>. */ + +#include <intprops.h> +#include <malloc-hugepages.h> +#include <not-cancel.h> + +unsigned long int +__malloc_default_thp_pagesize (void) +{ + int fd = __open64_nocancel ( + "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY); + if (fd == -1) + return 0; + + char str[INT_BUFSIZE_BOUND (unsigned long int)]; + ssize_t s = __read_nocancel (fd, str, sizeof (str)); + __close_nocancel (fd); + if (s < 0) + return 0; + + unsigned long int r = 0; + for (ssize_t i = 0; i < s; i++) + { + if (str[i] == '\n') + break; + r *= 10; + r += str[i] - '0'; + } + return r; +} + +enum malloc_thp_mode_t +__malloc_thp_mode (void) +{ + int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled", + O_RDONLY); + if (fd == -1) + return malloc_thp_mode_not_supported; + + static const char mode_always[] = "[always] madvise never\n"; + static const char mode_madvise[] = "always [madvise] never\n"; + static const char mode_never[] = "always madvise [never]\n"; + + char str[sizeof(mode_always)]; + ssize_t s = __read_nocancel (fd, str, sizeof (str)); + __close_nocancel (fd); + + if (s == sizeof (mode_always) - 1) + { + if (strcmp (str, mode_always) == 0) + return malloc_thp_mode_always; + else if (strcmp (str, mode_madvise) == 0) + return malloc_thp_mode_madvise; + else if (strcmp (str, mode_never) == 0) + return malloc_thp_mode_never; + } + return malloc_thp_mode_not_supported; +} |