aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
diff options
context:
space:
mode:
authorAmrita H S <amritahs@linux.ibm.com>2024-05-06 09:01:29 -0500
committerPeter Bergner <bergner@linux.ibm.com>2024-05-06 09:01:29 -0500
commit23f0d81608d0ca6379894ef81670cf30af7fd081 (patch)
tree2c6ec46bf4d7bfb00c46f215a5735cb49db65e31 /sysdeps/powerpc/powerpc64/multiarch/strncmp.c
parentea73eb5f581ef5931fd67005aa0c526ba43366c9 (diff)
downloadglibc-23f0d81608d0ca6379894ef81670cf30af7fd081.zip
glibc-23f0d81608d0ca6379894ef81670cf30af7fd081.tar.gz
glibc-23f0d81608d0ca6379894ef81670cf30af7fd081.tar.bz2
powerpc: Optimized strncmp for power10
This patch is based on __strcmp_power10. Improvements from __strncmp_power9: 1. Uses new POWER10 instructions - This code uses lxvp to decrease contention on load by loading 32 bytes per instruction. 2. Performance implication - This version has around 38% better performance on average. - Minor performance regression is seen for few small sizes and specific combination of alignments. Signed-off-by: Amrita H S <amritahs@linux.ibm.com> Reviewed-by: Peter Bergner <bergner@linux.ibm.com>
Diffstat (limited to 'sysdeps/powerpc/powerpc64/multiarch/strncmp.c')
-rw-r--r--sysdeps/powerpc/powerpc64/multiarch/strncmp.c4
1 files changed, 4 insertions, 0 deletions
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index 6178f4a..a5ed67f 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -29,6 +29,7 @@ extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
# ifdef __LITTLE_ENDIAN__
extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
+extern __typeof (strncmp) __strncmp_power10 attribute_hidden;
# endif
# undef strncmp
@@ -36,6 +37,9 @@ extern __typeof (strncmp) __strncmp_power9 attribute_hidden;
ifunc symbol properly. */
libc_ifunc_redirected (__redirect_strncmp, strncmp,
# ifdef __LITTLE_ENDIAN__
+ (hwcap2 & PPC_FEATURE2_ARCH_3_1
+ && hwcap & PPC_FEATURE_HAS_VSX)
+ ? __strncmp_power10 :
(hwcap2 & PPC_FEATURE2_ARCH_3_00
&& hwcap & PPC_FEATURE_HAS_ALTIVEC)
? __strncmp_power9 :