From 23f0d81608d0ca6379894ef81670cf30af7fd081 Mon Sep 17 00:00:00 2001 From: Amrita H S Date: Mon, 6 May 2024 09:01:29 -0500 Subject: =?UTF-8?q?=EF=BB=BFpowerpc:=20Optimized=20strncmp=20for=20power10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch is based on __strcmp_power10. Improvements from __strncmp_power9: 1. Uses new POWER10 instructions - This code uses lxvp to decrease contention on load by loading 32 bytes per instruction. 2. Performance implication - This version has around 38% better performance on average. - Minor performance regression is seen for few small sizes and specific combination of alignments. Signed-off-by: Amrita H S Reviewed-by: Peter Bergner --- sysdeps/powerpc/powerpc64/multiarch/strncmp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'sysdeps/powerpc/powerpc64/multiarch/strncmp.c') diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c index 6178f4a..a5ed67f 100644 --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c @@ -29,6 +29,7 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden; extern __typeof (strncmp) __strncmp_power8 attribute_hidden; # ifdef __LITTLE_ENDIAN__ extern __typeof (strncmp) __strncmp_power9 attribute_hidden; +extern __typeof (strncmp) __strncmp_power10 attribute_hidden; # endif # undef strncmp @@ -36,6 +37,9 @@ extern __typeof (strncmp) __strncmp_power9 attribute_hidden; ifunc symbol properly. */ libc_ifunc_redirected (__redirect_strncmp, strncmp, # ifdef __LITTLE_ENDIAN__ + (hwcap2 & PPC_FEATURE2_ARCH_3_1 + && hwcap & PPC_FEATURE_HAS_VSX) + ? __strncmp_power10 : (hwcap2 & PPC_FEATURE2_ARCH_3_00 && hwcap & PPC_FEATURE_HAS_ALTIVEC) ? __strncmp_power9 : -- cgit v1.1