diff options
author | Richard Biener <rguenther@suse.de> | 2019-12-03 11:59:13 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2019-12-03 11:59:13 +0000 |
commit | 5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c (patch) | |
tree | 5c84bcbb4ae4e3a1b2b326b0cbf5f37160d5b143 /gcc | |
parent | 8f316505da1348cca2eb7aeac7919710c255d396 (diff) | |
download | gcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.zip gcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.tar.gz gcc-5105b576dfdcae0c6cc30a89a68b4ef3cbf56a1c.tar.bz2 |
re PR tree-optimization/92645 (Hand written vector code is 450 times slower when compiled with GCC compared to Clang)
2019-12-03 Richard Biener <rguenther@suse.de>
PR tree-optimization/92645
* gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy
from or to a properly aligned register variable.
* gcc.target/i386/pr92645-5.c: New testcase.
From-SVN: r278934
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/gimple-fold.c | 41 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92645-5.c | 21 |
4 files changed, 51 insertions, 22 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9917ec2..e9119f9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2019-12-03 Richard Biener <rguenther@suse.de> + + PR tree-optimization/92645 + * gimple-fold.c (gimple_fold_builtin_memory_op): Fold memcpy + from or to a properly aligned register variable. + 2019-12-03 Matthias Klose <doko@ubuntu.com> * Makefile.in (SOURCES): Add doc/lto-dump.1. diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index 849bee2..24a478a 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -986,36 +986,33 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, src_align = get_pointer_alignment (src); dest_align = get_pointer_alignment (dest); - if (dest_align < TYPE_ALIGN (desttype) - || src_align < TYPE_ALIGN (srctype)) - return false; + /* Choose between src and destination type for the access based + on alignment, whether the access constitutes a register access + and whether it may actually expose a declaration for SSA rewrite + or SRA decomposition. */ destvar = NULL_TREE; + srcvar = NULL_TREE; if (TREE_CODE (dest) == ADDR_EXPR && var_decl_component_p (TREE_OPERAND (dest, 0)) - && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len)) + && tree_int_cst_equal (TYPE_SIZE_UNIT (desttype), len) + && dest_align >= TYPE_ALIGN (desttype) + && (is_gimple_reg_type (desttype) + || src_align >= TYPE_ALIGN (desttype))) destvar = fold_build2 (MEM_REF, desttype, dest, off0); - - srcvar = NULL_TREE; - if (TREE_CODE (src) == ADDR_EXPR - && var_decl_component_p (TREE_OPERAND (src, 0)) - && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len)) - { - if (!destvar - || src_align >= TYPE_ALIGN (desttype)) - srcvar = fold_build2 (MEM_REF, destvar ? desttype : srctype, - src, off0); - else if (!STRICT_ALIGNMENT) - { - srctype = build_aligned_type (TYPE_MAIN_VARIANT (desttype), - src_align); - srcvar = fold_build2 (MEM_REF, srctype, src, off0); - } - } - + else if (TREE_CODE (src) == ADDR_EXPR + && var_decl_component_p (TREE_OPERAND (src, 0)) + && tree_int_cst_equal (TYPE_SIZE_UNIT (srctype), len) + && src_align >= TYPE_ALIGN (srctype) + && (is_gimple_reg_type (srctype) + || dest_align >= TYPE_ALIGN (srctype))) + srcvar = fold_build2 (MEM_REF, srctype, src, off0); if (srcvar == NULL_TREE && destvar == NULL_TREE) return false; + /* Now that we chose an access type express the other side in + terms of it if the target allows that with respect to alignment + constraints. */ if (srcvar == NULL_TREE) { if (src_align >= TYPE_ALIGN (desttype)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e7e0788..158e090 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,10 @@ 2019-12-03 Richard Biener <rguenther@suse.de> + PR tree-optimization/92645 + * gcc.target/i386/pr92645-5.c: New testcase. + +2019-12-03 Richard Biener <rguenther@suse.de> + PR tree-optimization/92751 * g++.dg/tree-ssa/pr92751.C: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/pr92645-5.c b/gcc/testsuite/gcc.target/i386/pr92645-5.c new file mode 100644 index 0000000..af3a4e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92645-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-cddce1 -mavx2 -Wno-psabi" } */ +typedef long long v4di __attribute__((vector_size(32))); +struct Vec +{ + unsigned int v[8]; +}; + +v4di pun (struct Vec *s) +{ + v4di tem; + __builtin_memcpy (&tem, s, 32); + return tem; +} + +/* We're expecting exactly two stmts, in particular no BIT_INSERT_EXPR + and no memcpy call. + _3 = MEM <vector(4) long long int> [(char * {ref-all})s_2(D)]; + return _3; */ +/* { dg-final { scan-tree-dump-times " = MEM" 1 "cddce1" } } */ +/* { dg-final { scan-tree-dump-not "memcpy" "cddce1" } } */ |