aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHaochen Gui <guihaoc@gcc.gnu.org>2023-12-27 10:32:21 +0800
committerHaochen Gui <guihaoc@gcc.gnu.org>2023-12-27 10:35:14 +0800
commitd92d26ff366c5ceb4795d1ad678847c1b3d88e63 (patch)
treef1ad49ad5563e37f1c2632036b68e04fe05a12d2 /gcc
parentdaea7777cee46549b8a43624f32ec66b4e783f3f (diff)
downloadgcc-d92d26ff366c5ceb4795d1ad678847c1b3d88e63.zip
gcc-d92d26ff366c5ceb4795d1ad678847c1b3d88e63.tar.gz
gcc-d92d26ff366c5ceb4795d1ad678847c1b3d88e63.tar.bz2
rs6000: Clean up the pre-checkings of expand_block_compare
Remove P7 CPU test as only P7 above can enter this function and P7 LE is excluded by the checking of targetm.slow_unaligned_access on word_mode. Also performance test shows the expand of block compare is better than library on P7 BE when the length is from 16 bytes to 64 bytes. gcc/ * config/rs6000/rs6000-string.cc (expand_block_compare): Assert only P7 above can enter this function. Remove P7 CPU test and let P7 BE do the expand. gcc/testsuite/ * gcc.target/powerpc/block-cmp-4.c: New.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/rs6000/rs6000-string.cc35
-rw-r--r--gcc/testsuite/gcc.target/powerpc/block-cmp-4.c11
2 files changed, 21 insertions, 25 deletions
diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc
index fa80298..133e538 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1947,11 +1947,8 @@ expand_block_compare_gpr(unsigned HOST_WIDE_INT bytes, unsigned int base_align,
bool
expand_block_compare (rtx operands[])
{
- rtx target = operands[0];
- rtx orig_src1 = operands[1];
- rtx orig_src2 = operands[2];
- rtx bytes_rtx = operands[3];
- rtx align_rtx = operands[4];
+ /* TARGET_POPCNTD is already guarded at expand cmpmemsi. */
+ gcc_assert (TARGET_POPCNTD);
/* This case is complicated to handle because the subtract
with carry instructions do not generate the 64-bit
@@ -1960,23 +1957,19 @@ expand_block_compare (rtx operands[])
if (TARGET_32BIT && TARGET_POWERPC64)
return false;
- bool isP7 = (rs6000_tune == PROCESSOR_POWER7);
-
/* Allow this param to shut off all expansion. */
if (rs6000_block_compare_inline_limit == 0)
return false;
- /* targetm.slow_unaligned_access -- don't do unaligned stuff.
- However slow_unaligned_access returns true on P7 even though the
- performance of this code is good there. */
- if (!isP7
- && (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
- || targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))))
- return false;
+ rtx target = operands[0];
+ rtx orig_src1 = operands[1];
+ rtx orig_src2 = operands[2];
+ rtx bytes_rtx = operands[3];
+ rtx align_rtx = operands[4];
- /* Unaligned l*brx traps on P7 so don't do this. However this should
- not affect much because LE isn't really supported on P7 anyway. */
- if (isP7 && !BYTES_BIG_ENDIAN)
+ /* targetm.slow_unaligned_access -- don't do unaligned stuff. */
+ if (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
+ || targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
return false;
/* If this is not a fixed size compare, try generating loop code and
@@ -2025,14 +2018,6 @@ expand_block_compare (rtx operands[])
if (!IN_RANGE (bytes, 1, max_bytes))
return expand_compare_loop (operands);
- /* The code generated for p7 and older is not faster than glibc
- memcmp if alignment is small and length is not short, so bail
- out to avoid those conditions. */
- if (targetm.slow_unaligned_access (word_mode, align_by_bits)
- && ((base_align == 1 && bytes > 16)
- || (base_align == 2 && bytes > 32)))
- return false;
-
rtx final_label = NULL;
if (use_vec)
diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
new file mode 100644
index 0000000..c86feba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target be } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power7" } */
+/* { dg-final { scan-assembler-not {\mb[l]? memcmp\M} } } */
+
+/* Test that it does expand for memcmpsi instead of calling library on
+ P7 BE when length is less than 32 bytes. */
+
+int foo (const char* s1, const char* s2)
+{
+ return __builtin_memcmp (s1, s2, 31);
+}