aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-12-03 11:59:13 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-12-03 11:59:13 +0000
commit5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c (patch)
tree5c84bcbb4ae4e3a1b2b326b0cbf5f37160d5b143
parent8f316505da1348cca2eb7aeac7919710c255d396 (diff)
downloadgcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.zip
gcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.tar.gz
gcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.tar.bz2
re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-12-03 Richard Biener <rguenther@suse.de> PR tree-optimization/92645 * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy from or to a properly aligned register variable. * gcc.target/i386/pr92645-5.c: New testcase. From-SVN: r278934
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/gimple-fold.c41
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr92645-5.c21
4 files changed, 51 insertions, 22 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9917ec2..e9119f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-12-03 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92645
+ * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
+ from or to a properly aligned register variable.
+
2019-12-03 Matthias Klose <doko@ubuntu.com>
* Makefile.in (SOURCES): Add doc/lto-dump.1.
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 849bee2..24a478a 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
src_align = get_pointer_alignment (src);
dest_align = get_pointer_alignment (dest);
- if (dest_align < TYPE_ALIGN (desttype)
- || src_align < TYPE_ALIGN (srctype))
- return false;
+ /* Choose between src and destination type for the access based
+ on alignment, whether the access constitutes a register access
+ and whether it may actually expose a declaration for SSA rewrite
+ or SRA decomposition. */
destvar = NULL_TREE;
+ srcvar = NULL_TREE;
if (TREE_CODE (dest) == ADDR_EXPR
&& var_decl_component_p (TREE_OPERAND (dest, 0))
- && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len))
+ && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)
+ && dest_align >= TYPE_ALIGN (desttype)
+ && (is_gimple_reg_type (desttype)
+ || src_align >= TYPE_ALIGN (desttype)))
destvar = fold_build2 (MEM_REF, desttype, dest, off0);
-
- srcvar = NULL_TREE;
- if (TREE_CODE (src) == ADDR_EXPR
- && var_decl_component_p (TREE_OPERAND (src, 0))
- && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len))
- {
- if (!destvar
- || src_align >= TYPE_ALIGN (desttype))
- srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype,
- src, off0);
- else if (!STRICT_ALIGNMENT)
- {
- srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype),
- src_align);
- srcvar = fold_build2 (MEM_REF, srctype, src, off0);
- }
- }
-
+ else if (TREE_CODE (src) == ADDR_EXPR
+ && var_decl_component_p (TREE_OPERAND (src, 0))
+ && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)
+ && src_align >= TYPE_ALIGN (srctype)
+ && (is_gimple_reg_type (srctype)
+ || dest_align >= TYPE_ALIGN (srctype)))
+ srcvar = fold_build2 (MEM_REF, srctype, src, off0);
if (srcvar == NULL_TREE && destvar == NULL_TREE)
return false;
+ /* Now that we chose an access type express the other side in
+ terms of it if the target allows that with respect to alignment
+ constraints. */
if (srcvar == NULL_TREE)
{
if (src_align >= TYPE_ALIGN (desttype))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e7e0788..158e090 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
2019-12-03 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/92645
+ * gcc.target/i386/pr92645-5.c: New testcase.
+
+2019-12-03 Richard Biener <rguenther@suse.de>
+
PR tree-optimization/92751
* g++.dg/tree-ssa/pr92751.C: New testcase.
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-5.c b/gcc/testsuite/gcc.target/i386/pr92645-5.c
new file mode 100644
index 0000000..af3a4e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92645-5.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */
+typedef long long v4di __attribute__((vector_size(32)));
+struct Vec
+{
+ unsigned int v[8];
+};
+
+v4di pun (struct Vec *s)
+{
+ v4di tem;
+ __builtin_memcpy (&tem, s, 32);
+ return tem;
+}
+
+/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR
+ and no memcpy call.
+ _3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)];
+ return _3; */
+/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */
+/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */