aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-02 18:26:16 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-02 18:26:16 +0000
commit279b805713fd498afb7986698a2e3406bc947d87 (patch)
tree93129a39bf65aef7a97f30ca8329f00826514d79
parent7ac7e2868d450dfb9080166ddc4abcc21b86fab3 (diff)
downloadgcc-279b805713fd498afb7986698a2e3406bc947d87.zip
gcc-279b805713fd498afb7986698a2e3406bc947d87.tar.gz
gcc-279b805713fd498afb7986698a2e3406bc947d87.tar.bz2
Refactor expand_vec_perm
This patch splits the variable handling out of expand_vec_perm into a subroutine, so that the next patch can use a different interface for expanding constant permutes. expand_vec_perm now does all the CONST_VECTOR handling directly and defers to expand_vec_perm_var for other rtx codes. Handling CONST_VECTORs includes handling the fallback to variable permutes. The patch also adds an assert for valid optab modes to expand_vec_perm_1, so that we get it when using optabs for CONST_VECTORs. The MODE_VECTOR_INT part was previously in expand_vec_perm and the mode_for_int_vector part is new. Most of the patch is just reindentation. 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * optabs.c (expand_vec_perm_1): Assert that SEL has an integer vector mode and that that mode matches the mode of the data being permuted. (expand_vec_perm): Split handling of non-CONST_VECTOR selectors out into expand_vec_perm_var. Do all CONST_VECTOR handling here, directly using expand_vec_perm_1 when forcing selectors into registers. (expand_vec_perm_var): New function, split out from expand_vec_perm. From-SVN: r256092
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/optabs.c258
2 files changed, 163 insertions, 106 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 61fddf2..b82ea04 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
+ * optabs.c (expand_vec_perm_1): Assert that SEL has an integer
+ vector mode and that that mode matches the mode of the data
+ being permuted.
+ (expand_vec_perm): Split handling of non-CONST_VECTOR selectors
+ out into expand_vec_perm_var. Do all CONST_VECTOR handling here,
+ directly using expand_vec_perm_1 when forcing selectors into
+ registers.
+ (expand_vec_perm_var): New function, split out from expand_vec_perm.
+
+2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
+
* optabs-query.h (can_vec_perm_p): Delete.
(can_vec_perm_var_p, can_vec_perm_const_p): Declare.
* optabs-query.c (can_vec_perm_p): Split into...
diff --git a/gcc/optabs.c b/gcc/optabs.c
index a2213dd..3549b4a 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5425,6 +5425,8 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
machine_mode smode = GET_MODE (sel);
struct expand_operand ops[4];
+ gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
+ || mode_for_int_vector (tmode).require () == smode);
create_output_operand (&ops[0], target, tmode);
create_input_operand (&ops[3], sel, smode);
@@ -5451,8 +5453,13 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
return NULL_RTX;
}
-/* Generate instructions for vec_perm optab given its mode
- and three operands. */
+static rtx expand_vec_perm_var (machine_mode, rtx, rtx, rtx, rtx);
+
+/* Implement a permutation of vectors v0 and v1 using the permutation
+ vector in SEL and return the result. Use TARGET to hold the result
+ if nonnull and convenient.
+
+ MODE is the mode of the vectors being permuted (V0 and V1). */
rtx
expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
@@ -5463,6 +5470,9 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
rtx tmp, sel_qi = NULL;
rtvec vec;
+ if (GET_CODE (sel) != CONST_VECTOR)
+ return expand_vec_perm_var (mode, v0, v1, sel, target);
+
if (!target || GET_MODE (target) != mode)
target = gen_reg_rtx (mode);
@@ -5475,86 +5485,125 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
if (!qimode_for_vec_perm (mode).exists (&qimode))
qimode = VOIDmode;
- /* If the input is a constant, expand it specially. */
- gcc_assert (GET_MODE_CLASS (GET_MODE (sel)) == MODE_VECTOR_INT);
- if (GET_CODE (sel) == CONST_VECTOR)
- {
- /* See if this can be handled with a vec_shr. We only do this if the
- second vector is all zeroes. */
- enum insn_code shift_code = optab_handler (vec_shr_optab, mode);
- enum insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode)
- ? optab_handler (vec_shr_optab, qimode)
- : CODE_FOR_nothing);
- rtx shift_amt = NULL_RTX;
- if (v1 == CONST0_RTX (GET_MODE (v1))
- && (shift_code != CODE_FOR_nothing
- || shift_code_qi != CODE_FOR_nothing))
+ /* See if this can be handled with a vec_shr. We only do this if the
+ second vector is all zeroes. */
+ insn_code shift_code = optab_handler (vec_shr_optab, mode);
+ insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode)
+ ? optab_handler (vec_shr_optab, qimode)
+ : CODE_FOR_nothing);
+
+ if (v1 == CONST0_RTX (GET_MODE (v1))
+ && (shift_code != CODE_FOR_nothing
+ || shift_code_qi != CODE_FOR_nothing))
+ {
+ rtx shift_amt = shift_amt_for_vec_perm_mask (sel);
+ if (shift_amt)
{
- shift_amt = shift_amt_for_vec_perm_mask (sel);
- if (shift_amt)
+ struct expand_operand ops[3];
+ if (shift_code != CODE_FOR_nothing)
{
- struct expand_operand ops[3];
- if (shift_code != CODE_FOR_nothing)
- {
- create_output_operand (&ops[0], target, mode);
- create_input_operand (&ops[1], v0, mode);
- create_convert_operand_from_type (&ops[2], shift_amt,
- sizetype);
- if (maybe_expand_insn (shift_code, 3, ops))
- return ops[0].value;
- }
- if (shift_code_qi != CODE_FOR_nothing)
- {
- tmp = gen_reg_rtx (qimode);
- create_output_operand (&ops[0], tmp, qimode);
- create_input_operand (&ops[1], gen_lowpart (qimode, v0),
- qimode);
- create_convert_operand_from_type (&ops[2], shift_amt,
- sizetype);
- if (maybe_expand_insn (shift_code_qi, 3, ops))
- return gen_lowpart (mode, ops[0].value);
- }
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], v0, mode);
+ create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
+ if (maybe_expand_insn (shift_code, 3, ops))
+ return ops[0].value;
+ }
+ if (shift_code_qi != CODE_FOR_nothing)
+ {
+ rtx tmp = gen_reg_rtx (qimode);
+ create_output_operand (&ops[0], tmp, qimode);
+ create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode);
+ create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
+ if (maybe_expand_insn (shift_code_qi, 3, ops))
+ return gen_lowpart (mode, ops[0].value);
}
}
+ }
- icode = direct_optab_handler (vec_perm_const_optab, mode);
- if (icode != CODE_FOR_nothing)
+ icode = direct_optab_handler (vec_perm_const_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
+
+ /* Fall back to a constant byte-based permutation. */
+ if (qimode != VOIDmode)
+ {
+ vec = rtvec_alloc (w);
+ for (i = 0; i < e; ++i)
{
- tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
- if (tmp)
- return tmp;
+ unsigned int j, this_e;
+
+ this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
+ this_e &= 2 * e - 1;
+ this_e *= u;
+
+ for (j = 0; j < u; ++j)
+ RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
}
+ sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
- /* Fall back to a constant byte-based permutation. */
- if (qimode != VOIDmode)
+ icode = direct_optab_handler (vec_perm_const_optab, qimode);
+ if (icode != CODE_FOR_nothing)
{
- vec = rtvec_alloc (w);
- for (i = 0; i < e; ++i)
- {
- unsigned int j, this_e;
+ tmp = gen_reg_rtx (qimode);
+ tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel_qi);
+ if (tmp)
+ return gen_lowpart (mode, tmp);
+ }
+ }
- this_e = INTVAL (CONST_VECTOR_ELT (sel, i));
- this_e &= 2 * e - 1;
- this_e *= u;
+ /* Otherwise expand as a fully variable permuation. */
- for (j = 0; j < u; ++j)
- RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
- }
- sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
+ icode = direct_optab_handler (vec_perm_optab, mode);
+ if (icode != CODE_FOR_nothing)
+ {
+ rtx tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
- icode = direct_optab_handler (vec_perm_const_optab, qimode);
- if (icode != CODE_FOR_nothing)
- {
- tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
- tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
- gen_lowpart (qimode, v1), sel_qi);
- if (tmp)
- return gen_lowpart (mode, tmp);
- }
+ if (qimode != VOIDmode)
+ {
+ icode = direct_optab_handler (vec_perm_optab, qimode);
+ if (icode != CODE_FOR_nothing)
+ {
+ rtx tmp = gen_reg_rtx (qimode);
+ tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel_qi);
+ if (tmp)
+ return gen_lowpart (mode, tmp);
}
}
- /* Otherwise expand as a fully variable permuation. */
+ return NULL_RTX;
+}
+
+/* Implement a permutation of vectors v0 and v1 using the permutation
+ vector in SEL and return the result. Use TARGET to hold the result
+ if nonnull and convenient.
+
+ MODE is the mode of the vectors being permuted (V0 and V1).
+ SEL must have the integer equivalent of MODE and is known to be
+ unsuitable for permutes with a constant permutation vector. */
+
+static rtx
+expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
+{
+ enum insn_code icode;
+ unsigned int i, w, u;
+ rtx tmp, sel_qi;
+ rtvec vec;
+
+ w = GET_MODE_SIZE (mode);
+ u = GET_MODE_UNIT_SIZE (mode);
+
+ if (!target || GET_MODE (target) != mode)
+ target = gen_reg_rtx (mode);
+
icode = direct_optab_handler (vec_perm_optab, mode);
if (icode != CODE_FOR_nothing)
{
@@ -5565,51 +5614,48 @@ expand_vec_perm (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
/* As a special case to aid several targets, lower the element-based
permutation to a byte-based permutation and try again. */
- if (qimode == VOIDmode)
+ machine_mode qimode;
+ if (!qimode_for_vec_perm (mode).exists (&qimode))
return NULL_RTX;
icode = direct_optab_handler (vec_perm_optab, qimode);
if (icode == CODE_FOR_nothing)
return NULL_RTX;
- if (sel_qi == NULL)
- {
- /* Multiply each element by its byte size. */
- machine_mode selmode = GET_MODE (sel);
- if (u == 2)
- sel = expand_simple_binop (selmode, PLUS, sel, sel,
- NULL, 0, OPTAB_DIRECT);
- else
- sel = expand_simple_binop (selmode, ASHIFT, sel,
- gen_int_shift_amount (selmode,
- exact_log2 (u)),
- NULL, 0, OPTAB_DIRECT);
- gcc_assert (sel != NULL);
+ /* Multiply each element by its byte size. */
+ machine_mode selmode = GET_MODE (sel);
+ if (u == 2)
+ sel = expand_simple_binop (selmode, PLUS, sel, sel,
+ NULL, 0, OPTAB_DIRECT);
+ else
+ sel = expand_simple_binop (selmode, ASHIFT, sel,
+ gen_int_shift_amount (selmode, exact_log2 (u)),
+ NULL, 0, OPTAB_DIRECT);
+ gcc_assert (sel != NULL);
- /* Broadcast the low byte each element into each of its bytes. */
- vec = rtvec_alloc (w);
- for (i = 0; i < w; ++i)
- {
- int this_e = i / u * u;
- if (BYTES_BIG_ENDIAN)
- this_e += u - 1;
- RTVEC_ELT (vec, i) = GEN_INT (this_e);
- }
- tmp = gen_rtx_CONST_VECTOR (qimode, vec);
- sel = gen_lowpart (qimode, sel);
- sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
- gcc_assert (sel != NULL);
-
- /* Add the byte offset to each byte element. */
- /* Note that the definition of the indicies here is memory ordering,
- so there should be no difference between big and little endian. */
- vec = rtvec_alloc (w);
- for (i = 0; i < w; ++i)
- RTVEC_ELT (vec, i) = GEN_INT (i % u);
- tmp = gen_rtx_CONST_VECTOR (qimode, vec);
- sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
- sel, 0, OPTAB_DIRECT);
- gcc_assert (sel_qi != NULL);
- }
+ /* Broadcast the low byte each element into each of its bytes. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ {
+ int this_e = i / u * u;
+ if (BYTES_BIG_ENDIAN)
+ this_e += u - 1;
+ RTVEC_ELT (vec, i) = GEN_INT (this_e);
+ }
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel = gen_lowpart (qimode, sel);
+ sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
+ gcc_assert (sel != NULL);
+
+ /* Add the byte offset to each byte element. */
+ /* Note that the definition of the indicies here is memory ordering,
+ so there should be no difference between big and little endian. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ RTVEC_ELT (vec, i) = GEN_INT (i % u);
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp,
+ sel, 0, OPTAB_DIRECT);
+ gcc_assert (sel_qi != NULL);
tmp = mode != qimode ? gen_reg_rtx (qimode) : target;
tmp = expand_vec_perm_1 (icode, tmp, gen_lowpart (qimode, v0),