aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-12-16 14:03:30 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2017-12-16 14:03:30 +0000
commitbe4c1d4a42c5c7dc8bffbc5c9e3250f02be0d922 (patch)
tree5436123c8b1b4b28382ac09e07402b9762dda344 /gcc
parent02308bd3ec458762af1109d0ca6d2be757d555a0 (diff)
downloadgcc-be4c1d4a42c5c7dc8bffbc5c9e3250f02be0d922.zip
gcc-be4c1d4a42c5c7dc8bffbc5c9e3250f02be0d922.tar.gz
gcc-be4c1d4a42c5c7dc8bffbc5c9e3250f02be0d922.tar.bz2
Add VEC_DUPLICATE_EXPR and associated optab
SVE needs a way of broadcasting a scalar to a variable-length vector. This patch adds VEC_DUPLICATE_EXPR for when CONSTRUCTOR would be used for fixed-length vectors; this is the tree equivalent of the existing rtl code VEC_DUPLICATE. The patch also adds a vec_duplicate_optab to go with VEC_DUPLICATE_EXPR. 2017-12-16 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hawyard@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/generic.texi (VEC_DUPLICATE_EXPR): Document. (VEC_COND_EXPR): Add missing @tindex. * doc/md.texi (vec_duplicate@var{m}): Document. * tree.def (VEC_DUPLICATE_EXPR): New tree codes. * tree.c (build_vector_from_val): Add stubbed-out handling of variable-length vectors, using VEC_DUPLICATE_EXPR. (uniform_vector_p): Handle VEC_DUPLICATE_EXPR. * cfgexpand.c (expand_debug_expr): Likewise. * tree-cfg.c (verify_gimple_assign_unary): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. * tree-vect-generic.c (ssa_uniform_vector_p): Likewise. * fold-const.c (const_unop): Fold VEC_DUPLICATE_EXPRs of a constant. (test_vec_duplicate_folding): New function. (fold_const_c_tests): Call it. * optabs.def (vec_duplicate_optab): New optab. * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR. * optabs.h (expand_vector_broadcast): Declare. * optabs.c (expand_vector_broadcast): Make non-static. Try using vec_duplicate_optab. * expr.c (store_constructor): Try using vec_duplicate_optab for uniform vectors. (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r255740
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog28
-rw-r--r--gcc/cfgexpand.c1
-rw-r--r--gcc/doc/generic.texi6
-rw-r--r--gcc/doc/md.texi11
-rw-r--r--gcc/expr.c39
-rw-r--r--gcc/fold-const.c22
-rw-r--r--gcc/optabs-tree.c3
-rw-r--r--gcc/optabs.c12
-rw-r--r--gcc/optabs.def2
-rw-r--r--gcc/optabs.h1
-rw-r--r--gcc/tree-cfg.c11
-rw-r--r--gcc/tree-inline.c1
-rw-r--r--gcc/tree-pretty-print.c9
-rw-r--r--gcc/tree-vect-generic.c1
-rw-r--r--gcc/tree.c7
-rw-r--r--gcc/tree.def3
16 files changed, 147 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c2d037a..b3cbc1c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2017-12-16 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hawyard@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * doc/generic.texi (VEC_DUPLICATE_EXPR): Document.
+ (VEC_COND_EXPR): Add missing @tindex.
+ * doc/md.texi (vec_duplicate@var{m}): Document.
+ * tree.def (VEC_DUPLICATE_EXPR): New tree codes.
+ * tree.c (build_vector_from_val): Add stubbed-out handling of
+ variable-length vectors, using VEC_DUPLICATE_EXPR.
+ (uniform_vector_p): Handle VEC_DUPLICATE_EXPR.
+ * cfgexpand.c (expand_debug_expr): Likewise.
+ * tree-cfg.c (verify_gimple_assign_unary): Likewise.
+ * tree-inline.c (estimate_operator_cost): Likewise.
+ * tree-pretty-print.c (dump_generic_node): Likewise.
+ * tree-vect-generic.c (ssa_uniform_vector_p): Likewise.
+ * fold-const.c (const_unop): Fold VEC_DUPLICATE_EXPRs of a constant.
+ (test_vec_duplicate_folding): New function.
+ (fold_const_c_tests): Call it.
+ * optabs.def (vec_duplicate_optab): New optab.
+ * optabs-tree.c (optab_for_tree_code): Handle VEC_DUPLICATE_EXPR.
+ * optabs.h (expand_vector_broadcast): Declare.
+ * optabs.c (expand_vector_broadcast): Make non-static. Try using
+ vec_duplicate_optab.
+ * expr.c (store_constructor): Try using vec_duplicate_optab for
+ uniform vectors.
+ (expand_expr_real_2): Handle VEC_DUPLICATE_EXPR.
+
2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
PR target/83358
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index ce98264..bde2119 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5069,6 +5069,7 @@ expand_debug_expr (tree exp)
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
case VEC_PERM_EXPR:
+ case VEC_DUPLICATE_EXPR:
return NULL;
/* Misc codes. */
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index b01cdaa..640eb3b 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1768,6 +1768,7 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
@node Vectors
@subsection Vectors
+@tindex VEC_DUPLICATE_EXPR
@tindex VEC_LSHIFT_EXPR
@tindex VEC_RSHIFT_EXPR
@tindex VEC_WIDEN_MULT_HI_EXPR
@@ -1779,9 +1780,14 @@ a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_PACK_FIX_TRUNC_EXPR
+@tindex VEC_COND_EXPR
@tindex SAD_EXPR
@table @code
+@item VEC_DUPLICATE_EXPR
+This node has a single operand and represents a vector in which every
+element is equal to that operand.
+
@item VEC_LSHIFT_EXPR
@itemx VEC_RSHIFT_EXPR
These nodes represent whole vector left and right shifts, respectively.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 9e0540a..f9d997a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4888,6 +4888,17 @@ and operand 1 is parallel containing values for individual fields. The
the vector mode @var{m}, or a vector mode with the same element mode and
smaller number of elements.
+@cindex @code{vec_duplicate@var{m}} instruction pattern
+@item @samp{vec_duplicate@var{m}}
+Initialize vector output operand 0 so that each element has the value given
+by scalar input operand 1. The vector has mode @var{m} and the scalar has
+the mode appropriate for one element of @var{m}.
+
+This pattern only handles duplicates of non-constant inputs. Constant
+vectors go through the @code{mov@var{m}} pattern instead.
+
+This pattern is not allowed to @code{FAIL}.
+
@cindex @code{vec_cmp@var{m}@var{n}} instruction pattern
@item @samp{vec_cmp@var{m}@var{n}}
Output a vector comparison. Operand 0 of mode @var{n} is the destination for
diff --git a/gcc/expr.c b/gcc/expr.c
index 8011638..5f7c7e4 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -6598,7 +6598,8 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
constructor_elt *ce;
int i;
int need_to_clear;
- int icode = CODE_FOR_nothing;
+ insn_code icode = CODE_FOR_nothing;
+ tree elt;
tree elttype = TREE_TYPE (type);
int elt_size = tree_to_uhwi (TYPE_SIZE (elttype));
machine_mode eltmode = TYPE_MODE (elttype);
@@ -6608,13 +6609,30 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
unsigned n_elts;
alias_set_type alias;
bool vec_vec_init_p = false;
+ machine_mode mode = GET_MODE (target);
gcc_assert (eltmode != BLKmode);
+ /* Try using vec_duplicate_optab for uniform vectors. */
+ if (!TREE_SIDE_EFFECTS (exp)
+ && VECTOR_MODE_P (mode)
+ && eltmode == GET_MODE_INNER (mode)
+ && ((icode = optab_handler (vec_duplicate_optab, mode))
+ != CODE_FOR_nothing)
+ && (elt = uniform_vector_p (exp)))
+ {
+ struct expand_operand ops[2];
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], expand_normal (elt), eltmode);
+ expand_insn (icode, 2, ops);
+ if (!rtx_equal_p (target, ops[0].value))
+ emit_move_insn (target, ops[0].value);
+ break;
+ }
+
n_elts = TYPE_VECTOR_SUBPARTS (type);
- if (REG_P (target) && VECTOR_MODE_P (GET_MODE (target)))
+ if (REG_P (target) && VECTOR_MODE_P (mode))
{
- machine_mode mode = GET_MODE (target);
machine_mode emode = eltmode;
if (CONSTRUCTOR_NELTS (exp)
@@ -6626,7 +6644,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
== n_elts);
emode = TYPE_MODE (etype);
}
- icode = (int) convert_optab_handler (vec_init_optab, mode, emode);
+ icode = convert_optab_handler (vec_init_optab, mode, emode);
if (icode != CODE_FOR_nothing)
{
unsigned int i, n = n_elts;
@@ -6674,7 +6692,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
if (need_to_clear && size > 0 && !vector)
{
if (REG_P (target))
- emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
+ emit_move_insn (target, CONST0_RTX (mode));
else
clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
@@ -6682,7 +6700,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
/* Inform later passes that the old value is dead. */
if (!cleared && !vector && REG_P (target))
- emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
+ emit_move_insn (target, CONST0_RTX (mode));
if (MEM_P (target))
alias = MEM_ALIAS_SET (target);
@@ -6733,8 +6751,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size,
if (vector)
emit_insn (GEN_FCN (icode) (target,
- gen_rtx_PARALLEL (GET_MODE (target),
- vector)));
+ gen_rtx_PARALLEL (mode, vector)));
break;
}
@@ -9567,6 +9584,12 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
return target;
+ case VEC_DUPLICATE_EXPR:
+ op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
+ target = expand_vector_broadcast (mode, op0);
+ gcc_assert (target);
+ return target;
+
case BIT_INSERT_EXPR:
{
unsigned bitpos = tree_to_uhwi (treeop2);
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 9fc69e8..6ce9ea1 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -1770,6 +1770,11 @@ const_unop (enum tree_code code, tree type, tree arg0)
return elts.build ();
}
+ case VEC_DUPLICATE_EXPR:
+ if (CONSTANT_CLASS_P (arg0))
+ return build_vector_from_val (type, arg0);
+ return NULL_TREE;
+
default:
break;
}
@@ -14477,6 +14482,22 @@ test_vector_folding ()
ASSERT_FALSE (integer_nonzerop (fold_build2 (NE_EXPR, res_type, one, one)));
}
+/* Verify folding of VEC_DUPLICATE_EXPRs. */
+
+static void
+test_vec_duplicate_folding ()
+{
+ scalar_int_mode int_mode = SCALAR_INT_TYPE_MODE (ssizetype);
+ machine_mode vec_mode = targetm.vectorize.preferred_simd_mode (int_mode);
+ /* This will be 1 if VEC_MODE isn't a vector mode. */
+ unsigned int nunits = GET_MODE_NUNITS (vec_mode);
+
+ tree type = build_vector_type (ssizetype, nunits);
+ tree dup5_expr = fold_unary (VEC_DUPLICATE_EXPR, type, ssize_int (5));
+ tree dup5_cst = build_vector_from_val (type, ssize_int (5));
+ ASSERT_TRUE (operand_equal_p (dup5_expr, dup5_cst, 0));
+}
+
/* Run all of the selftests within this file. */
void
@@ -14484,6 +14505,7 @@ fold_const_c_tests ()
{
test_arithmetic_folding ();
test_vector_folding ();
+ test_vec_duplicate_folding ();
}
} // namespace selftest
diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
index a510c16..e0eb20c 100644
--- a/gcc/optabs-tree.c
+++ b/gcc/optabs-tree.c
@@ -199,6 +199,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
return TYPE_UNSIGNED (type) ?
vec_pack_ufix_trunc_optab : vec_pack_sfix_trunc_optab;
+ case VEC_DUPLICATE_EXPR:
+ return vec_duplicate_optab;
+
default:
break;
}
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 518ce7a..30fe996 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -367,7 +367,7 @@ force_expand_binop (machine_mode mode, optab binoptab,
mode of OP must be the element mode of VMODE. If OP is a constant,
then the return value will be a constant. */
-static rtx
+rtx
expand_vector_broadcast (machine_mode vmode, rtx op)
{
enum insn_code icode;
@@ -380,6 +380,16 @@ expand_vector_broadcast (machine_mode vmode, rtx op)
if (valid_for_const_vec_duplicate_p (vmode, op))
return gen_const_vec_duplicate (vmode, op);
+ icode = optab_handler (vec_duplicate_optab, vmode);
+ if (icode != CODE_FOR_nothing)
+ {
+ struct expand_operand ops[2];
+ create_output_operand (&ops[0], NULL_RTX, vmode);
+ create_input_operand (&ops[1], op, GET_MODE (op));
+ expand_insn (icode, 2, ops);
+ return ops[0].value;
+ }
+
/* ??? If the target doesn't have a vec_init, then we have no easy way
of performing this operation. Most of this sort of generic support
is hidden away in the vector lowering support in gimple. */
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 54afe2d..f3f4bc8 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -364,3 +364,5 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
+
+OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 07d07fe..32f876a 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -181,6 +181,7 @@ extern rtx simplify_expand_binop (machine_mode mode, optab binoptab,
enum optab_methods methods);
extern bool force_expand_binop (machine_mode, optab, rtx, rtx, rtx, int,
enum optab_methods);
+extern rtx expand_vector_broadcast (machine_mode, rtx);
/* Generate code for a simple binary or unary operation. "Simple" in
this case means "can be unambiguously described by a (mode, code)
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 3b16c10..2b331d6 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3880,6 +3880,17 @@ verify_gimple_assign_unary (gassign *stmt)
case CONJ_EXPR:
break;
+ case VEC_DUPLICATE_EXPR:
+ if (TREE_CODE (lhs_type) != VECTOR_TYPE
+ || !useless_type_conversion_p (TREE_TYPE (lhs_type), rhs1_type))
+ {
+ error ("vec_duplicate should be from a scalar to a like vector");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ return true;
+ }
+ return false;
+
default:
gcc_unreachable ();
}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 8604ba1..99546be 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3928,6 +3928,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
+ case VEC_DUPLICATE_EXPR:
return 1;
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 6519f3e..31ed900 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -3178,6 +3178,15 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
pp_string (pp, " > ");
break;
+ case VEC_DUPLICATE_EXPR:
+ pp_space (pp);
+ for (str = get_tree_code_name (code); *str; str++)
+ pp_character (pp, TOUPPER (*str));
+ pp_string (pp, " < ");
+ dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (pp, " > ");
+ break;
+
case VEC_UNPACK_HI_EXPR:
pp_string (pp, " VEC_UNPACK_HI_EXPR < ");
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index aa5542d..b214208 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -1419,6 +1419,7 @@ static tree
ssa_uniform_vector_p (tree op)
{
if (TREE_CODE (op) == VECTOR_CST
+ || TREE_CODE (op) == VEC_DUPLICATE_EXPR
|| TREE_CODE (op) == CONSTRUCTOR)
return uniform_vector_p (op);
if (TREE_CODE (op) == SSA_NAME)
diff --git a/gcc/tree.c b/gcc/tree.c
index ed1852b..8e0313c 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -1785,6 +1785,8 @@ build_vector_from_val (tree vectype, tree sc)
v.quick_push (sc);
return v.build ();
}
+ else if (0)
+ return fold_build1 (VEC_DUPLICATE_EXPR, vectype, sc);
else
{
vec<constructor_elt, va_gc> *v;
@@ -10468,7 +10470,10 @@ uniform_vector_p (const_tree vec)
gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec)));
- if (TREE_CODE (vec) == VECTOR_CST)
+ if (TREE_CODE (vec) == VEC_DUPLICATE_EXPR)
+ return TREE_OPERAND (vec, 0);
+
+ else if (TREE_CODE (vec) == VECTOR_CST)
{
if (VECTOR_CST_NPATTERNS (vec) == 1 && VECTOR_CST_DUPLICATE_P (vec))
return VECTOR_CST_ENCODED_ELT (vec, 0);
diff --git a/gcc/tree.def b/gcc/tree.def
index 137e63f..c3af824 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -537,6 +537,9 @@ DEFTREECODE (TARGET_EXPR, "target_expr", tcc_expression, 4)
1 and 2 are NULL. The operands are then taken from the cfg edges. */
DEFTREECODE (COND_EXPR, "cond_expr", tcc_expression, 3)
+/* Represents a vector in which every element is equal to operand 0. */
+DEFTREECODE (VEC_DUPLICATE_EXPR, "vec_duplicate_expr", tcc_unary, 1)
+
/* Vector conditional expression. It is like COND_EXPR, but with
vector operands.