aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-05-16 08:03:49 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-05-16 08:03:49 +0000
commit1bf2a0b90f2457f6d9301535560eb5e05978261b (patch)
treebce84fa1f77945c69ed17cb17728304fec2ac700
parentadfe6e4b2f8e7288710eac6bacd6bae8d6ea3c05 (diff)
downloadgcc-1bf2a0b90f2457f6d9301535560eb5e05978261b.zip
gcc-1bf2a0b90f2457f6d9301535560eb5e05978261b.tar.gz
gcc-1bf2a0b90f2457f6d9301535560eb5e05978261b.tar.bz2
re PR target/90424 (memcpy into vector builtin not optimized)
2019-05-16 Richard Biener <rguenther@suse.de> PR tree-optimization/90424 * tree-ssa.c (non_rewritable_lvalue_p): Handle inserts from aligned subvectors. (execute_update_addresses_taken): Likewise. * tree-cfg.c (verify_gimple_assign_ternary): Likewise. * g++.target/i386/pr90424-1.C: New testcase. * g++.target/i386/pr90424-2.C: Likewise. From-SVN: r271279
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/g++.target/i386/pr90424-1.C32
-rw-r--r--gcc/testsuite/g++.target/i386/pr90424-2.C31
-rw-r--r--gcc/tree-cfg.c11
-rw-r--r--gcc/tree-ssa.c49
6 files changed, 124 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b8c658d..1ddc8bf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
2019-05-16 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/90424
+ * tree-ssa.c (non_rewritable_lvalue_p): Handle inserts from
+ aligned subvectors.
+ (execute_update_addresses_taken): Likewise.
+ * tree-cfg.c (verify_gimple_assign_ternary): Likewise.
+
+2019-05-16 Richard Biener <rguenther@suse.de>
+
* gimple-pretty-print.c (dump_ternary_rhs): Dump BIT_INSERT_EXPR
as __BIT_INSERT with -gimple.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2ca1d27..5f01fdf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2019-05-16 Richard Biener <rguenther@suse.de>
+ PR tree-optimization/90424
+ * g++.target/i386/pr90424-1.C: New testcase.
+ * g++.target/i386/pr90424-2.C: Likewise.
+
+2019-05-16 Richard Biener <rguenther@suse.de>
+
* gcc.dg/gimplefe-40.c: Amend again.
2019-05-15 Cherry Zhang <cherryyz@google.com>
diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C
new file mode 100644
index 0000000..9df8c08
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr90424-1.C
@@ -0,0 +1,32 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
+
+template <class T>
+using V [[gnu::vector_size(16)]] = T;
+
+template <class T, unsigned M = sizeof(V<T>)>
+V<T> load(const void *p) {
+ using W = V<T>;
+ W r;
+ __builtin_memcpy(&r, p, M);
+ return r;
+}
+
+// movq or movsd
+template V<char> load<char, 8>(const void *); // bad
+template V<short> load<short, 8>(const void *); // bad
+template V<int> load<int, 8>(const void *); // bad
+template V<long> load<long, 8>(const void *); // good
+// the following is disabled because V2SF isn't a supported mode
+// template V<float> load<float, 8>(const void *); // bad
+template V<double> load<double, 8>(const void *); // good (movsd?)
+
+// movd or movss
+template V<char> load<char, 4>(const void *); // bad
+template V<short> load<short, 4>(const void *); // bad
+template V<int> load<int, 4>(const void *); // good
+template V<float> load<float, 4>(const void *); // good
+
+/* We should end up with one load and one insert for each function. */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C
new file mode 100644
index 0000000..3abb65f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr90424-2.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
+
+template <class T>
+using V [[gnu::vector_size(16)]] = T;
+
+template <class T, unsigned M = sizeof(V<T>)>
+V<T> load(const void *p) {
+ V<T> r = {};
+ __builtin_memcpy(&r, p, M);
+ return r;
+}
+
+// movq or movsd
+template V<char> load<char, 8>(const void *); // bad
+template V<short> load<short, 8>(const void *); // bad
+template V<int> load<int, 8>(const void *); // bad
+template V<long> load<long, 8>(const void *); // good
+// the following is disabled because V2SF isn't a supported mode
+// template V<float> load<float, 8>(const void *); // bad
+template V<double> load<double, 8>(const void *); // good (movsd?)
+
+// movd or movss
+template V<char> load<char, 4>(const void *); // bad
+template V<short> load<short, 4>(const void *); // bad
+template V<int> load<int, 4>(const void *); // good
+template V<float> load<float, 4>(const void *); // good
+
+/* We should end up with one load and one insert for each function. */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 966ce5a..c6a70c8 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4263,8 +4263,17 @@ verify_gimple_assign_ternary (gassign *stmt)
}
if (! ((INTEGRAL_TYPE_P (rhs1_type)
&& INTEGRAL_TYPE_P (rhs2_type))
+ /* Vector element insert. */
|| (VECTOR_TYPE_P (rhs1_type)
- && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
+ && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))
+ /* Aligned sub-vector insert. */
+ || (VECTOR_TYPE_P (rhs1_type)
+ && VECTOR_TYPE_P (rhs2_type)
+ && types_compatible_p (TREE_TYPE (rhs1_type),
+ TREE_TYPE (rhs2_type))
+ && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type),
+ TYPE_VECTOR_SUBPARTS (rhs2_type))
+ && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type)))))
{
error ("not allowed type combination in BIT_INSERT_EXPR");
debug_generic_expr (rhs1_type);
diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
index 489f6dc..8e3aec1 100644
--- a/gcc/tree-ssa.c
+++ b/gcc/tree-ssa.c
@@ -1521,14 +1521,29 @@ non_rewritable_lvalue_p (tree lhs)
if (DECL_P (decl)
&& VECTOR_TYPE_P (TREE_TYPE (decl))
&& TYPE_MODE (TREE_TYPE (decl)) != BLKmode
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
- TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0)
&& known_ge (mem_ref_offset (lhs), 0)
&& known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
mem_ref_offset (lhs))
&& multiple_of_p (sizetype, TREE_OPERAND (lhs, 1),
TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
- return false;
+ {
+ poly_uint64 lhs_bits, nelts;
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits)
+ && multiple_p (lhs_bits,
+ tree_to_uhwi
+ (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))),
+ &nelts))
+ {
+ if (known_eq (nelts, 1u))
+ return false;
+ /* For sub-vector inserts the insert vector mode has to be
+ supported. */
+ tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)),
+ nelts);
+ if (TYPE_MODE (vtype) != BLKmode)
+ return false;
+ }
+ }
}
/* A vector-insert using a BIT_FIELD_REF is rewritable using
@@ -1866,20 +1881,30 @@ execute_update_addresses_taken (void)
&& bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
&& VECTOR_TYPE_P (TREE_TYPE (sym))
&& TYPE_MODE (TREE_TYPE (sym)) != BLKmode
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
- TYPE_SIZE_UNIT
- (TREE_TYPE (TREE_TYPE (sym))), 0)
- && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
- && tree_int_cst_lt (TREE_OPERAND (lhs, 1),
- TYPE_SIZE_UNIT (TREE_TYPE (sym)))
- && (tree_to_uhwi (TREE_OPERAND (lhs, 1))
- % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
+ && known_ge (mem_ref_offset (lhs), 0)
+ && known_gt (wi::to_poly_offset
+ (TYPE_SIZE_UNIT (TREE_TYPE (sym))),
+ mem_ref_offset (lhs))
+ && multiple_of_p (sizetype,
+ TREE_OPERAND (lhs, 1),
+ TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
{
tree val = gimple_assign_rhs1 (stmt);
if (! types_compatible_p (TREE_TYPE (val),
TREE_TYPE (TREE_TYPE (sym))))
{
- tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym)));
+ poly_uint64 lhs_bits, nelts;
+ tree temtype = TREE_TYPE (TREE_TYPE (sym));
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)),
+ &lhs_bits)
+ && multiple_p (lhs_bits,
+ tree_to_uhwi
+ (TYPE_SIZE (TREE_TYPE
+ (TREE_TYPE (sym)))),
+ &nelts)
+ && maybe_ne (nelts, 1u))
+ temtype = build_vector_type (temtype, nelts);
+ tree tem = make_ssa_name (temtype);
gimple *pun
= gimple_build_assign (tem,
build1 (VIEW_CONVERT_EXPR,