aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS8
-rw-r--r--Rules17
-rw-r--r--elf/dl-tunables.list3
-rw-r--r--elf/tst-rtld-list-tunables.exp2
-rw-r--r--malloc/Makefile8
-rw-r--r--malloc/arena.c4
-rw-r--r--malloc/malloc.c31
-rw-r--r--manual/tunables.texi7
-rw-r--r--sysdeps/generic/malloc-hugepages.c8
-rw-r--r--sysdeps/generic/malloc-hugepages.h7
-rw-r--r--sysdeps/unix/sysv/linux/malloc-hugepages.c127
11 files changed, 207 insertions, 15 deletions
diff --git a/NEWS b/NEWS
index 3b94dd2..c7200cd 100644
--- a/NEWS
+++ b/NEWS
@@ -93,9 +93,11 @@ Major new features:
configuration.
* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
- make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
- Setting this might improve performance with Transparent Huge Pages madvise
- mode depending of the workload.
+ either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk
+ or to use huge pages directly with mmap calls with the MAP_HUGETLB
+ flags). The former can improve performance when Transparent Huge Pages
+ is set to 'madvise' mode while the latter uses the system reserved
+ huge pages.
Deprecated and removed features, and other changes affecting compatibility:
diff --git a/Rules b/Rules
index 5f5d9ba..be34982 100644
--- a/Rules
+++ b/Rules
@@ -158,6 +158,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
$(tests-mcheck:%=$(objpfx)%-mcheck.out) \
$(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
$(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
+ $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \
$(tests-special) $(tests-printers-out)
xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
endif
@@ -170,6 +171,7 @@ else
tests-expected = $(tests) $(tests-internal) $(tests-printers) \
$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
+ $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \
$(tests-mcheck:%=%-mcheck)
endif
tests:
@@ -199,6 +201,7 @@ endif
binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
+binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2)
else
binaries-all-notests =
binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
@@ -211,6 +214,7 @@ binaries-pie-notests =
binaries-mcheck-tests =
binaries-malloc-check-tests =
binaries-malloc-hugetlb1-tests =
+binaries-malloc-hugetlb2-tests =
endif
binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
@@ -259,6 +263,14 @@ $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o
$(+link-tests)
endif
+ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" ""
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \
+ $(link-extra-libs-tests) \
+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+ $(+link-tests)
+endif
+
ifneq "$(strip $(binaries-pie-tests))" ""
$(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
$(link-extra-libs-tests) \
@@ -302,6 +314,11 @@ $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
endef
$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
+# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2
+define malloc-hugetlb2-ENVS
+$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2
+endef
+$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t))))
# mcheck tests need the debug DSO to support -lmcheck.
define mcheck-ENVS
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index d1fd3f3..845d521 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -93,9 +93,8 @@ glibc {
security_level: SXID_IGNORE
}
hugetlb {
- type: INT_32
+ type: SIZE_T
minval: 0
- maxval: 1
}
}
cpu {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index d8e363f..cdfdb56 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,7 +1,7 @@
glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
glibc.malloc.check: 0 (min: 0, max: 3)
-glibc.malloc.hugetlb: 0 (min: 0, max: 1)
+glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/Makefile b/malloc/Makefile
index 0137595..e9a6666 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -78,9 +78,9 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
$(tests-static),$(tests))
-# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
-# Transparent Huge Pages support. We need exclude some tests that define
-# the ENV vars.
+# Run all tests with GLIBC_TUNABLES=glibc.malloc.hugetlb={1,2} which check
+# the Transparent Huge Pages support (1) or automatic huge page support (2).
+# We need exclude some tests that define the ENV vars.
tests-exclude-hugetlb1 = \
tst-compathooks-off \
tst-compathooks-on \
@@ -93,6 +93,8 @@ tests-exclude-hugetlb1 = \
tst-mallocstate
tests-malloc-hugetlb1 = \
$(filter-out $(tests-exclude-hugetlb1), $(tests))
+tests-malloc-hugetlb2 = \
+ $(filter-out $(tests-exclude-hugetlb1), $(tests))
# -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
diff --git a/malloc/arena.c b/malloc/arena.c
index cd00c7b..9a6e1af 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -230,7 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
#endif
TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
-TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, size_t)
#else
/* Initialization routine. */
#include <string.h>
@@ -331,7 +331,7 @@ ptmalloc_init (void)
TUNABLE_CALLBACK (set_tcache_unsorted_limit));
# endif
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
- TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
+ TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb));
#else
if (__glibc_likely (_environ != NULL))
{
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 6b6ec53..75efdc2 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1883,6 +1883,10 @@ struct malloc_par
#if HAVE_TUNABLES
/* Transparent Large Page support. */
INTERNAL_SIZE_T thp_pagesize;
+ /* A value different than 0 means to align mmap allocation to hp_pagesize
+ add hp_flags on flags. */
+ INTERNAL_SIZE_T hp_pagesize;
+ int hp_flags;
#endif
/* Memory map support */
@@ -2440,7 +2444,10 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
if (mm == MAP_FAILED)
return mm;
- madvise_thp (mm, size);
+#ifdef MAP_HUGETLB
+ if (!(extra_flags & MAP_HUGETLB))
+ madvise_thp (mm, size);
+#endif
/*
The offset to the start of the mmapped region is stored in the prev_size
@@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
|| ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
&& (mp_.n_mmaps < mp_.n_mmaps_max)))
{
- char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
+ char *mm;
+#if HAVE_TUNABLES
+ if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
+ {
+ /* There is no need to isse the THP madvise call if Huge Pages are
+ used directly. */
+ mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
+ if (mm != MAP_FAILED)
+ return mm;
+ }
+#endif
+ mm = sysmalloc_mmap (nb, pagesize, 0, av);
if (mm != MAP_FAILED)
return mm;
tried_mmap = true;
@@ -2609,7 +2627,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
}
else if (!tried_mmap)
{
- /* We can at least try to use to mmap memory. */
+ /* We can at least try to use to mmap memory. If new_heap fails
+ it is unlikely that trying to allocate huge pages will
+ succeed. */
char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
if (mm != MAP_FAILED)
return mm;
@@ -5383,7 +5403,7 @@ do_set_mxfast (size_t value)
#if HAVE_TUNABLES
static __always_inline int
-do_set_hugetlb (int32_t value)
+do_set_hugetlb (size_t value)
{
if (value == 1)
{
@@ -5395,6 +5415,9 @@ do_set_hugetlb (int32_t value)
if (thp_mode == malloc_thp_mode_madvise)
mp_.thp_pagesize = __malloc_default_thp_pagesize ();
}
+ else if (value >= 2)
+ __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize,
+ &mp_.hp_flags);
return 0;
}
#endif
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 9ca6e3f..58a47b2 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -278,6 +278,13 @@ default value is @code{0}, which disables any additional support on
Setting its value to @code{1} enables the use of @code{madvise} with
@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled
only if the system supports Transparent Huge Page (currently only on Linux).
+
+Setting its value to @code{2} enables the use of Huge Page directly with
+@code{mmap} with the use of @code{MAP_HUGETLB} flag. The huge page size
+to use will be the default one provided by the system. A value larger than
+@code{2} specifies huge page size, which will be matched against the system
+supported ones. If provided value is invalid, @code{MAP_HUGETLB} will not
+be used.
@end deftp
@node Dynamic Linking Tunables
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
index 8fb459a..946284a 100644
--- a/sysdeps/generic/malloc-hugepages.c
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -29,3 +29,11 @@ __malloc_thp_mode (void)
{
return malloc_thp_mode_not_supported;
}
+
+/* Return the default transparent huge page size. */
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+ *pagesize = 0;
+ *flags = 0;
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index f5a442e..75cda37 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -34,4 +34,11 @@ enum malloc_thp_mode_t
enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
+/* Return the supported huge page size from the REQUESTED sizes on PAGESIZE
+ along with the required extra mmap flags on FLAGS, Requesting the value
+ of 0 returns the default huge page size, otherwise the value will be
+ matched against the sizes supported by the system. */
+void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+ attribute_hidden;
+
#endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 7497e07..0e05291 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -17,8 +17,10 @@
not, see <https://www.gnu.org/licenses/>. */
#include <intprops.h>
+#include <dirent.h>
#include <malloc-hugepages.h>
#include <not-cancel.h>
+#include <sys/mman.h>
unsigned long int
__malloc_default_thp_pagesize (void)
@@ -72,3 +74,128 @@ __malloc_thp_mode (void)
}
return malloc_thp_mode_not_supported;
}
+
+static size_t
+malloc_default_hugepage_size (void)
+{
+ int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ size_t hpsize = 0;
+
+ char buf[512];
+ off64_t off = 0;
+ while (1)
+ {
+ ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
+ if (r < 0)
+ break;
+ buf[r] = '\0';
+
+ /* If the tag is not found, read the last line again. */
+ const char *s = strstr (buf, "Hugepagesize:");
+ if (s == NULL)
+ {
+ char *nl = strrchr (buf, '\n');
+ if (nl == NULL)
+ break;
+ off += (nl + 1) - buf;
+ continue;
+ }
+
+ /* The default huge page size is in the form:
+ Hugepagesize: NUMBER kB */
+ s += sizeof ("Hugepagesize: ") - 1;
+ for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
+ {
+ if (s[i] == ' ')
+ continue;
+ hpsize *= 10;
+ hpsize += s[i] - '0';
+ }
+ hpsize *= 1024;
+ break;
+ }
+
+ __close_nocancel (fd);
+
+ return hpsize;
+}
+
+static inline int
+hugepage_flags (size_t pagesize)
+{
+ return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
+}
+
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+ *pagesize = 0;
+ *flags = 0;
+
+ if (requested == 0)
+ {
+ *pagesize = malloc_default_hugepage_size ();
+ if (*pagesize != 0)
+ *flags = hugepage_flags (*pagesize);
+ return;
+ }
+
+ /* Each entry represents a supported huge page in the form of:
+ hugepages-<size>kB. */
+ int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
+ O_RDONLY | O_DIRECTORY, 0);
+ if (dirfd == -1)
+ return;
+
+ char buffer[1024];
+ while (true)
+ {
+#if !IS_IN(libc)
+# define __getdents64 getdents64
+#endif
+ ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
+ if (ret == -1)
+ break;
+ else if (ret == 0)
+ break;
+
+ bool found = false;
+ char *begin = buffer, *end = buffer + ret;
+ while (begin != end)
+ {
+ unsigned short int d_reclen;
+ memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
+ sizeof (d_reclen));
+ const char *dname = begin + offsetof (struct dirent64, d_name);
+ begin += d_reclen;
+
+ if (dname[0] == '.'
+ || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
+ continue;
+
+ size_t hpsize = 0;
+ const char *sizestr = dname + sizeof ("hugepages-") - 1;
+ for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
+ {
+ hpsize *= 10;
+ hpsize += sizestr[i] - '0';
+ }
+ hpsize *= 1024;
+
+ if (hpsize == requested)
+ {
+ *pagesize = hpsize;
+ *flags = hugepage_flags (*pagesize);
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ break;
+ }
+
+ __close_nocancel (dirfd);
+}