aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:26:47 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:26:47 +0000
commite3342de49cbee48957acc749b9566eee230860be (patch)
tree32a86a752b83bafed11e1621d738a7fd284a93f7 /gcc/config
parent6da64f1b329f57c07f22ec034bc7bc4b0dc9e87b (diff)
downloadgcc-e3342de49cbee48957acc749b9566eee230860be.zip
gcc-e3342de49cbee48957acc749b9566eee230860be.tar.gz
gcc-e3342de49cbee48957acc749b9566eee230860be.tar.bz2
Make vec_perm_indices use new vector encoding
This patch changes vec_perm_indices from a plain vec<> to a class that stores a canonicalized permutation, using the same encoding as for VECTOR_CSTs. This means that vec_perm_indices now carries information about the number of vectors being permuted (currently always 1 or 2) and the number of elements in each input vector. A new vec_perm_builder class is used to actually build up the vector, like tree_vector_builder does for trees. vec_perm_indices is the completed representation, a bit like VECTOR_CST is for trees. The patch just does a mechanical conversion of the code to vec_perm_builder: a later patch uses explicit encodings where possible. The point of all this is that it makes the representation suitable for variable-length vectors. It's no longer necessary for the underlying vec<>s to store every element explicitly. In int-vector-builder.h, "using the same encoding as tree and rtx constants" describes the endpoint -- adding the rtx encoding comes later. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * int-vector-builder.h: New file. * vec-perm-indices.h: Include int-vector-builder.h. (vec_perm_indices): Redefine as an int_vector_builder. (auto_vec_perm_indices): Delete. (vec_perm_builder): Redefine as a stand-alone class. (vec_perm_indices::vec_perm_indices): New function. (vec_perm_indices::clamp): Likewise. * vec-perm-indices.c: Include fold-const.h and tree-vector-builder.h. (vec_perm_indices::new_vector): New function. (vec_perm_indices::new_expanded_vector): Update for new vec_perm_indices class. (vec_perm_indices::rotate_inputs): New function. (vec_perm_indices::all_in_range_p): Operate directly on the encoded form, without computing elided elements. (tree_to_vec_perm_builder): Operate directly on the VECTOR_CST encoding. Update for new vec_perm_indices class. * optabs.c (expand_vec_perm_const): Create a vec_perm_indices for the given vec_perm_builder. (expand_vec_perm_var): Update vec_perm_builder constructor. (expand_mult_highpart): Use vec_perm_builder instead of auto_vec_perm_indices. * optabs-query.c (can_mult_highpart_p): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Use a single or double series encoding as appropriate. * fold-const.c (fold_ternary_loc): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. * tree-ssa-forwprop.c (simplify_vector_constructor): Likewise. * tree-vect-data-refs.c (vect_grouped_store_supported): Likewise. (vect_permute_store_chain): Likewise. (vect_grouped_load_supported): Likewise. (vect_permute_load_chain): Likewise. (vect_shift_permute_load_chain): Likewise. * tree-vect-slp.c (vect_build_slp_tree_1): Likewise. (vect_transform_slp_perm_load): Likewise. (vect_schedule_slp_instance): Likewise. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. (vectorizable_mask_load_store): Likewise. (vectorizable_bswap): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. * tree-vect-generic.c (lower_vec_perm): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Use tree_to_vec_perm_builder to read the vector from a tree. * tree-vect-loop.c (calc_vec_perm_mask_for_shift): Take a vec_perm_builder instead of a vec_perm_indices. (have_whole_vector_shift): Use vec_perm_builder and vec_perm_indices instead of auto_vec_perm_indices. Leave the truncation to calc_vec_perm_mask_for_shift. (vect_create_epilog_for_reduction): Likewise. * config/aarch64/aarch64.c (expand_vec_perm_d::perm): Change from auto_vec_perm_indices to vec_perm_indices. (aarch64_expand_vec_perm_const_1): Use rotate_inputs on d.perm instead of changing individual elements. (aarch64_vectorize_vec_perm_const): Use new_vector to install the vector in d.perm. * config/arm/arm.c (expand_vec_perm_d::perm): Change from auto_vec_perm_indices to vec_perm_indices. (arm_expand_vec_perm_const_1): Use rotate_inputs on d.perm instead of changing individual elements. (arm_vectorize_vec_perm_const): Use new_vector to install the vector in d.perm. * config/powerpcspe/powerpcspe.c (rs6000_expand_extract_even): Update vec_perm_builder constructor. (rs6000_expand_interleave): Likewise. * config/rs6000/rs6000.c (rs6000_expand_extract_even): Likewise. (rs6000_expand_interleave): Likewise. From-SVN: r256095
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64.c13
-rw-r--r--gcc/config/arm/arm.c12
-rw-r--r--gcc/config/powerpcspe/powerpcspe.c4
-rw-r--r--gcc/config/rs6000/rs6000.c4
4 files changed, 12 insertions, 21 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 05b82bc..93e9d9f9 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13252,7 +13252,7 @@ aarch64_split_combinev16qi (rtx operands[3])
struct expand_vec_perm_d
{
rtx target, op0, op1;
- auto_vec_perm_indices perm;
+ vec_perm_indices perm;
machine_mode vmode;
bool one_vector_p;
bool testing_p;
@@ -13642,10 +13642,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
unsigned int nelt = d->perm.length ();
if (d->perm[0] >= nelt)
{
- gcc_assert (nelt == (nelt & -nelt));
- for (unsigned int i = 0; i < nelt; ++i)
- d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
-
+ d->perm.rotate_inputs (1);
std::swap (d->op0, d->op1);
}
@@ -13685,12 +13682,10 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
/* Calculate whether all elements are in one vector. */
unsigned int nelt = sel.length ();
- d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
unsigned int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
- d.perm.quick_push (ei);
}
switch (which)
@@ -13709,8 +13704,6 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
input vector. */
/* Fall Through. */
case 2:
- for (i = 0; i < nelt; ++i)
- d.perm[i] &= nelt - 1;
d.op0 = op1;
d.one_vector_p = true;
break;
@@ -13721,6 +13714,8 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
break;
}
+ d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
+
if (!d.testing_p)
return aarch64_expand_vec_perm_const_1 (&d);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 2173d95..11e35ad 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28854,7 +28854,7 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
struct expand_vec_perm_d
{
rtx target, op0, op1;
- auto_vec_perm_indices perm;
+ vec_perm_indices perm;
machine_mode vmode;
bool one_vector_p;
bool testing_p;
@@ -29362,9 +29362,7 @@ arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
unsigned int nelt = d->perm.length ();
if (d->perm[0] >= nelt)
{
- for (unsigned int i = 0; i < nelt; ++i)
- d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
-
+ d->perm.rotate_inputs (1);
std::swap (d->op0, d->op1);
}
@@ -29404,12 +29402,10 @@ arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
d.testing_p = !target;
nelt = GET_MODE_NUNITS (d.vmode);
- d.perm.reserve (nelt);
for (i = which = 0; i < nelt; ++i)
{
int ei = sel[i] & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
- d.perm.quick_push (ei);
}
switch (which)
@@ -29428,8 +29424,6 @@ arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
input vector. */
/* FALLTHRU */
case 2:
- for (i = 0; i < nelt; ++i)
- d.perm[i] &= nelt - 1;
d.op0 = op1;
d.one_vector_p = true;
break;
@@ -29440,6 +29434,8 @@ arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
break;
}
+ d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
+
if (d.testing_p)
return arm_expand_vec_perm_const_1 (&d);
diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index 9133125..520f2e1 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -38782,7 +38782,7 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
{
machine_mode vmode = GET_MODE (target);
unsigned i, nelt = GET_MODE_NUNITS (vmode);
- vec_perm_builder perm (nelt);
+ vec_perm_builder perm (nelt, nelt, 1);
for (i = 0; i < nelt; i++)
perm.quick_push (i * 2);
@@ -38797,7 +38797,7 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
{
machine_mode vmode = GET_MODE (target);
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
- vec_perm_builder perm (nelt);
+ vec_perm_builder perm (nelt, nelt, 1);
high = (highp ? 0 : nelt / 2);
for (i = 0; i < nelt / 2; i++)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 7d10b44..2e4af41 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -36042,7 +36042,7 @@ rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
{
machine_mode vmode = GET_MODE (target);
unsigned i, nelt = GET_MODE_NUNITS (vmode);
- vec_perm_builder perm (nelt);
+ vec_perm_builder perm (nelt, nelt, 1);
for (i = 0; i < nelt; i++)
perm.quick_push (i * 2);
@@ -36057,7 +36057,7 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
{
machine_mode vmode = GET_MODE (target);
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
- vec_perm_builder perm (nelt);
+ vec_perm_builder perm (nelt, nelt, 1);
high = (highp ? 0 : nelt / 2);
for (i = 0; i < nelt / 2; i++)