aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Pinski <andrew.pinski@oss.qualcomm.com>2025-08-28 17:20:21 -0700
committerAndrew Pinski <andrew.pinski@oss.qualcomm.com>2025-08-29 20:13:00 -0700
commitbfa70ddb650ec91c2511d351b2b3c3f78dfad6d4 (patch)
tree62bcc01c69ee67c1815764bc0da006d168ef01d9
parent668e607efe52abbe4612fa85454731d26a71a5be (diff)
downloadgcc-bfa70ddb650ec91c2511d351b2b3c3f78dfad6d4.zip
gcc-bfa70ddb650ec91c2511d351b2b3c3f78dfad6d4.tar.gz
gcc-bfa70ddb650ec91c2511d351b2b3c3f78dfad6d4.tar.bz2
forwprop: Copy the memcmp optimization from strlen to forwprop [PR116651]
To better optimize code dealing with `memcmp == 0` where we have a small constant size, we can inline the memcmp in those cases. There is code to do this in strlen but that is run too late in the case where we can figure out the value of one of the arguments to memcmp. So this copies the optimization to forwprop. An example of where this helps is: ``` bool cmpvect(const std::vector<int> &a) { return a == std::vector<int>{10}; } ``` Where the above should be optimized to just `return a.size() == 1 && a[0] == 10;`. Note pr44130.c testcase needed to change as now it will be optimized away otherwise. Note the loop in pr44130.c os also vectorized which it was not before. Note the optimization remains in strlen as the other part (memcmp -> memcmp_eq) should move to either isel or fab and I didn't want to remove it just yet. Bootstrapped and tested on x86_64-linux-gnu. Changes since v1: * v2: Add verification of arguments to memcmp to simplify_builtin_memcmp. PR tree-optimization/116651 PR tree-optimization/93265 PR tree-optimization/103647 PR tree-optimization/52171 gcc/ChangeLog: * tree-ssa-forwprop.cc (simplify_builtin_memcmp): New function. (simplify_builtin_call): Call simplify_builtin_memcmp for memcmp memcmp_eq builtins. gcc/testsuite/ChangeLog: * gcc.target/i386/pr44130.c: Add an inline-asm clobber. * g++.dg/tree-ssa/vector-compare-1.C: New test. Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
-rw-r--r--gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C24
-rw-r--r--gcc/testsuite/gcc.target/i386/pr44130.c2
-rw-r--r--gcc/tree-ssa-forwprop.cc71
3 files changed, 97 insertions, 0 deletions
diff --git a/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
new file mode 100644
index 0000000..d9b2bc2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/vector-compare-1.C
@@ -0,0 +1,24 @@
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -fdump-tree-optimized" }
+
+// PR tree-optimization/116651
+
+#include <vector>
+
+bool test1(const std::vector<int>& in) {
+ return in == std::vector<int>{24};
+}
+
+/* We should be to optimize this to:
+ int *b = in.bptr;
+ int *e = in.eptr;
+ auto size = e - b;
+ if (size != 4)
+ return false;
+ int v = *b;
+ return v == 24;
+
+*/
+
+// { dg-final { scan-tree-dump-times "== 24" 1 "optimized" } } */
+// { dg-final { scan-tree-dump-times "== 4" 1 "optimized" { target int32 } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr44130.c b/gcc/testsuite/gcc.target/i386/pr44130.c
index 2ad7409..6269dc8 100644
--- a/gcc/testsuite/gcc.target/i386/pr44130.c
+++ b/gcc/testsuite/gcc.target/i386/pr44130.c
@@ -21,6 +21,8 @@ void testf (void)
xxxxx[5] = __builtin_copysignf (-0.0, Yf[5]);
xxxxx[6] = __builtin_copysignf (__builtin_inff (), Yf[6]);
xxxxx[7] = __builtin_copysignf (-__builtin_nanf (""), Yf[7]);
+
+ asm("":"=m"(xxxxx));
for (i = 0; i < 8; ++i)
if (__builtin_memcmp (xxxxx+i, Zf+i, sizeof(float)) != 0)
abort ();
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index e0f25a1..32ce750 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -1569,6 +1569,74 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip)
return changed;
}
+/* Optimizes builtin memcmps for small constant sizes.
+ GSI_P is the GSI for the call. STMT is the call itself.
+ */
+
+static bool
+simplify_builtin_memcmp (gimple_stmt_iterator *gsi_p, gcall *stmt)
+{
+ /* Make sure memcmp arguments are the correct type. */
+ if (gimple_call_num_args (stmt) != 3)
+ return false;
+ tree arg1 = gimple_call_arg (stmt, 0);
+ tree arg2 = gimple_call_arg (stmt, 1);
+ tree len = gimple_call_arg (stmt, 2);
+
+ if (!POINTER_TYPE_P (TREE_TYPE (arg1)))
+ return false;
+ if (!POINTER_TYPE_P (TREE_TYPE (arg2)))
+ return false;
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (len)))
+ return false;
+
+ /* The return value of the memcmp has to be used
+ equality comparison to zero. */
+ tree res = gimple_call_lhs (stmt);
+
+ if (!res || !use_in_zero_equality (res))
+ return false;
+
+ unsigned HOST_WIDE_INT leni;
+
+ if (tree_fits_uhwi_p (len)
+ && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
+ && pow2p_hwi (leni))
+ {
+ leni *= CHAR_TYPE_SIZE;
+ unsigned align1 = get_pointer_alignment (arg1);
+ unsigned align2 = get_pointer_alignment (arg2);
+ unsigned align = MIN (align1, align2);
+ scalar_int_mode mode;
+ if (int_mode_for_size (leni, 1).exists (&mode)
+ && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
+ {
+ location_t loc = gimple_location (stmt);
+ tree type, off;
+ type = build_nonstandard_integer_type (leni, 1);
+ gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
+ tree ptrtype = build_pointer_type_for_mode (char_type_node,
+ ptr_mode, true);
+ off = build_int_cst (ptrtype, 0);
+ arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
+ arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
+ tree tem1 = fold_const_aggregate_ref (arg1);
+ if (tem1)
+ arg1 = tem1;
+ tree tem2 = fold_const_aggregate_ref (arg2);
+ if (tem2)
+ arg2 = tem2;
+ res = fold_convert_loc (loc, TREE_TYPE (res),
+ fold_build2_loc (loc, NE_EXPR,
+ boolean_type_node,
+ arg1, arg2));
+ gimplify_and_update_call_from_tree (gsi_p, res);
+ return true;
+ }
+ }
+ return false;
+}
+
/* *GSI_P is a GIMPLE_CALL to a builtin function.
Optimize
memcpy (p, "abcd", 4);
@@ -1606,6 +1674,9 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2, bool full_walk
switch (DECL_FUNCTION_CODE (callee2))
{
+ case BUILT_IN_MEMCMP:
+ case BUILT_IN_MEMCMP_EQ:
+ return simplify_builtin_memcmp (gsi_p, as_a<gcall*>(stmt2));
case BUILT_IN_MEMCHR:
if (gimple_call_num_args (stmt2) == 3
&& (res = gimple_call_lhs (stmt2)) != nullptr