aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2020-06-25 11:21:20 +0200
committerRichard Biener <rguenther@suse.de>2020-06-25 12:29:52 +0200
commit86ce59b4f05d8f68ec4d9a14a7732acb370412db (patch)
treeeb7c81a823bc2aaef6319c4cf91d561847bab2d7 /gcc
parent88891c5ff0e3e20d3dd743c4eb6cc45399ee5c33 (diff)
downloadgcc-86ce59b4f05d8f68ec4d9a14a7732acb370412db.zip
gcc-86ce59b4f05d8f68ec4d9a14a7732acb370412db.tar.gz
gcc-86ce59b4f05d8f68ec4d9a14a7732acb370412db.tar.bz2
tree-optimization/95866 - avoid using scalar ops for vectorized shift
This avoids using the original scalar SSA operand when vectorizing a shift with a vectorized shift operand where we know all vector components have the same value and thus we can use a vector by scalar shift. Using the scalar SSA operand causes a possibly long chain of scalar computation to be retained so it's better to simply extract lane zero from the available vectorized shift operand. 2020-06-25 Richard Biener <rguenther@suse.de> PR tree-optimization/95866 * tree-vect-stmts.c (vectorizable_shift): Reject incompatible vectorized shift operands. For scalar shifts use lane zero of a vectorized shift operand. * gcc.dg/vect/bb-slp-pr95866.c: New testcase.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c17
-rw-r--r--gcc/tree-vect-stmts.c30
2 files changed, 44 insertions, 3 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c
new file mode 100644
index 0000000..5de4671
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95866.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+int x[4];
+int j[4];
+void foo()
+{
+ x[0] = (x[0] << j[0]) + j[0];
+ x[1] = (x[1] << j[0]) + j[1];
+ x[2] = (x[2] << j[0]) + j[2];
+ x[3] = (x[3] << j[0]) + j[3];
+}
+
+/* The scalar shift argument should be extracted from the available vector. */
+/* { dg-final { scan-tree-dump "BIT_FIELD_REF" "slp2" } } */
+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index de7d77f..edd2853 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5413,6 +5413,15 @@ vectorizable_shift (vec_info *vinfo,
= (!op1_vectype
|| !tree_nop_conversion_p (TREE_TYPE (vectype),
TREE_TYPE (op1)));
+ if (incompatible_op1_vectype_p
+ && dt[1] == vect_internal_def)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unusable type for last operand in"
+ " vector/vector shift/rotate.\n");
+ return false;
+ }
}
}
}
@@ -5457,7 +5466,7 @@ vectorizable_shift (vec_info *vinfo,
{
if (slp_node
&& (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
- || (!scalar_shift_arg
+ || ((!scalar_shift_arg || dt[1] == vect_internal_def)
&& (!incompatible_op1_vectype_p
|| dt[1] == vect_constant_def)
&& !vect_maybe_update_slp_op_vectype
@@ -5499,6 +5508,7 @@ vectorizable_shift (vec_info *vinfo,
if (incompatible_op1_vectype_p && !slp_node)
{
+ gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
op1 = fold_convert (TREE_TYPE (vectype), op1);
if (dt[1] != vect_constant_def)
op1 = vect_init_vector (vinfo, stmt_info, op1,
@@ -5508,7 +5518,7 @@ vectorizable_shift (vec_info *vinfo,
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- if (scalar_shift_arg)
+ if (scalar_shift_arg && dt[1] != vect_internal_def)
{
/* Vector shl and shr insn patterns can be defined with scalar
operand 2 (shift operand). In this case, use constant or loop
@@ -5533,7 +5543,7 @@ vectorizable_shift (vec_info *vinfo,
vec_oprnds1.quick_push (vec_oprnd1);
}
}
- else if (slp_node && incompatible_op1_vectype_p)
+ else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
{
if (was_scalar_shift_arg)
{
@@ -5566,6 +5576,20 @@ vectorizable_shift (vec_info *vinfo,
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
vop1 = vec_oprnds1[i];
+ /* For internal defs where we need to use a scalar shift arg
+ extract the first lane. */
+ if (scalar_shift_arg && dt[1] == vect_internal_def)
+ {
+ new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
+ gassign *new_stmt
+ = gimple_build_assign (new_temp,
+ build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
+ vop1,
+ TYPE_SIZE (TREE_TYPE (new_temp)),
+ bitsize_zero_node));
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ vop1 = new_temp;
+ }
gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);