aboutsummaryrefslogtreecommitdiff
path: root/gcc/optabs.c
diff options
context:
space:
mode:
authorRichard Henderson <rth@redhat.com>2011-10-25 14:29:48 -0700
committerRichard Henderson <rth@gcc.gnu.org>2011-10-25 14:29:48 -0700
commit22e4dee74f73108b7dda295f6999276be12b7568 (patch)
tree646bec23201b08685d3560602023733c6d9f7259 /gcc/optabs.c
parentc3962b13f70898c51173b9f3a6bb1e369a84b459 (diff)
downloadgcc-22e4dee74f73108b7dda295f6999276be12b7568.zip
gcc-22e4dee74f73108b7dda295f6999276be12b7568.tar.gz
gcc-22e4dee74f73108b7dda295f6999276be12b7568.tar.bz2
Change vec_perm checking and expansion level.
The can_vec_perm_p interface changed to use a C integer array. This allows easy re-use from the rtl level and the gimple level within the vectorizer. It allows both to determine if a given permutation is (un-)supported without having to create tree/rtl garbage. The expand_vec_perm interface changed to use rtl. This allows easy re-use from the rtl level, so that expand_vec_perm can be used in the fallback implementation of other optabs. * target.def (vec_perm_const_ok): Change parameters to mode and array of indicies. * doc/tm.texi: Rebuild. * config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change parameters to mode and array of indicies. * expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Expand operands here. * optabs.c (can_vec_perm_p): Rename from can_vec_perm_expr_p. Change parameters to mode and array of indicies. (expand_vec_perm_1): Rename from expand_vec_perm_expr_1. (expand_vec_perm): Rename from expand_vec_perm_expr. Change parameters to mode and rtx inputs. Try lowering to QImode vec_perm_const before trying fully variable permutation. * optabs.h: Update decls. * tree-vect-generic.c (lower_vec_perm): Extract array of indices from VECTOR_CST to pass to can_vec_perm_p. * tree-vect-slp.c (vect_get_mask_element): Change mask parameter type from int pointer to unsigned char pointer. (vect_transform_slp_perm_load): Update for change to can_vec_perm_p. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. From-SVN: r180449
Diffstat (limited to 'gcc/optabs.c')
-rw-r--r--gcc/optabs.c215
1 files changed, 115 insertions, 100 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 5036856..26669f4 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6701,20 +6701,22 @@ vector_compare_rtx (tree cond, bool unsignedp, enum insn_code icode)
of the CPU. SEL may be NULL, which stands for an unknown constant. */
bool
-can_vec_perm_expr_p (tree type, tree sel)
+can_vec_perm_p (enum machine_mode mode, bool variable,
+ const unsigned char *sel)
{
- enum machine_mode mode, qimode;
- mode = TYPE_MODE (type);
+ enum machine_mode qimode;
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
if (!VECTOR_MODE_P (mode))
return false;
- if (sel == NULL || TREE_CODE (sel) == VECTOR_CST)
+ if (!variable)
{
if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
- && (sel == NULL || targetm.vectorize.vec_perm_const_ok (type, sel)))
+ && (sel == NULL
+ || targetm.vectorize.vec_perm_const_ok == NULL
+ || targetm.vectorize.vec_perm_const_ok (mode, sel)))
return true;
}
@@ -6722,6 +6724,8 @@ can_vec_perm_expr_p (tree type, tree sel)
return true;
/* We allow fallback to a QI vector mode, and adjust the mask. */
+ if (GET_MODE_INNER (mode) == QImode)
+ return false;
qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode));
if (!VECTOR_MODE_P (qimode))
return false;
@@ -6732,9 +6736,9 @@ can_vec_perm_expr_p (tree type, tree sel)
if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing)
return false;
- /* In order to support the lowering of non-constant permutations,
+ /* In order to support the lowering of variable permutations,
we need to support shifts and adds. */
- if (sel != NULL && TREE_CODE (sel) != VECTOR_CST)
+ if (variable)
{
if (GET_MODE_UNIT_SIZE (mode) > 2
&& optab_handler (ashl_optab, mode) == CODE_FOR_nothing
@@ -6747,11 +6751,11 @@ can_vec_perm_expr_p (tree type, tree sel)
return true;
}
-/* A subroutine of expand_vec_perm_expr for expanding one vec_perm insn. */
+/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */
static rtx
-expand_vec_perm_expr_1 (enum insn_code icode, rtx target,
- rtx v0, rtx v1, rtx sel)
+expand_vec_perm_1 (enum insn_code icode, rtx target,
+ rtx v0, rtx v1, rtx sel)
{
enum machine_mode tmode = GET_MODE (target);
enum machine_mode smode = GET_MODE (sel);
@@ -6783,119 +6787,130 @@ expand_vec_perm_expr_1 (enum insn_code icode, rtx target,
return NULL_RTX;
}
-/* Generate instructions for VEC_PERM_EXPR given its type and three
- operands. */
+/* Generate instructions for vec_perm optab given its mode
+ and three operands. */
+
rtx
-expand_vec_perm_expr (tree type, tree v0, tree v1, tree sel, rtx target)
+expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
{
enum insn_code icode;
- enum machine_mode mode = TYPE_MODE (type);
enum machine_mode qimode;
- rtx v0_rtx, v1_rtx, sel_rtx, *vec, vt, tmp;
unsigned int i, w, e, u;
+ rtx tmp, sel_qi;
+ rtvec vec;
- if (!target)
+ if (!target || GET_MODE (target) != mode)
target = gen_reg_rtx (mode);
- v0_rtx = expand_normal (v0);
- if (operand_equal_p (v0, v1, 0))
- v1_rtx = v0_rtx;
- else
- v1_rtx = expand_normal (v1);
- sel_rtx = expand_normal (sel);
+
+ w = GET_MODE_SIZE (mode);
+ e = GET_MODE_NUNITS (mode);
+ u = GET_MODE_UNIT_SIZE (mode);
+
+ /* Set QIMODE to a different vector mode with byte elements.
+ If no such mode, or if MODE already has byte elements, use VOIDmode. */
+ qimode = VOIDmode;
+ if (GET_MODE_INNER (mode) != QImode)
+ {
+ qimode = mode_for_vector (QImode, w);
+ if (!VECTOR_MODE_P (qimode))
+ qimode = VOIDmode;
+ }
/* If the input is a constant, expand it specially. */
- if (CONSTANT_P (sel_rtx))
+ if (CONSTANT_P (sel))
{
icode = direct_optab_handler (vec_perm_const_optab, mode);
- if (icode != CODE_FOR_nothing
- && targetm.vectorize.vec_perm_const_ok (TREE_TYPE (v0), sel)
- && (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx,
- v1_rtx, sel_rtx)) != NULL)
- return tmp;
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
+
+ /* Fall back to a constant byte-based permutation. */
+ if (qimode != VOIDmode)
+ {
+ icode = direct_optab_handler (vec_perm_const_optab, qimode);
+ if (icode != CODE_FOR_nothing)
+ {
+ vec = rtvec_alloc (w);
+ for (i = 0; i < e; ++i)
+ {
+ unsigned int j, this_e;
+
+ this_e = INTVAL (XVECEXP (sel, 0, i));
+ this_e &= 2 * e - 1;
+ this_e *= u;
+
+ for (j = 0; j < u; ++j)
+ RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
+ }
+ sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
+
+ tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
+ gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel_qi);
+ if (tmp)
+ return gen_lowpart (mode, tmp);
+ }
+ }
}
- /* Otherwise fall back to a fully variable permuation. */
+ /* Otherwise expand as a fully variable permuation. */
icode = direct_optab_handler (vec_perm_optab, mode);
- if (icode != CODE_FOR_nothing
- && (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx,
- v1_rtx, sel_rtx)) != NULL)
- return tmp;
+ if (icode != CODE_FOR_nothing)
+ {
+ tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
+ if (tmp)
+ return tmp;
+ }
/* As a special case to aid several targets, lower the element-based
permutation to a byte-based permutation and try again. */
- qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode));
- if (!VECTOR_MODE_P (qimode))
+ if (qimode == VOIDmode)
return NULL_RTX;
-
- /* ??? For completeness, we ought to check the QImode version of
- vec_perm_const_optab. But all users of this implicit lowering
- feature implement the variable vec_perm_optab. */
icode = direct_optab_handler (vec_perm_optab, qimode);
if (icode == CODE_FOR_nothing)
return NULL_RTX;
- w = GET_MODE_SIZE (mode);
- e = GET_MODE_NUNITS (mode);
- u = GET_MODE_UNIT_SIZE (mode);
- vec = XALLOCAVEC (rtx, w);
-
- if (CONSTANT_P (sel_rtx))
- {
- unsigned int j;
- for (i = 0; i < e; ++i)
- {
- unsigned int this_e = INTVAL (XVECEXP (sel_rtx, 0, i));
- this_e &= 2 * e - 1;
- this_e *= u;
-
- for (j = 0; j < u; ++j)
- vec[i * u + j] = GEN_INT (this_e + j);
- }
- sel_rtx = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
- }
+ /* Multiply each element by its byte size. */
+ if (u == 2)
+ sel = expand_simple_binop (mode, PLUS, sel, sel, sel, 0, OPTAB_DIRECT);
else
- {
- /* Multiply each element by its byte size. */
- if (u == 2)
- sel_rtx = expand_simple_binop (mode, PLUS, sel_rtx, sel_rtx,
- sel_rtx, 0, OPTAB_DIRECT);
- else
- sel_rtx = expand_simple_binop (mode, ASHIFT, sel_rtx,
- GEN_INT (exact_log2 (u)),
- sel_rtx, 0, OPTAB_DIRECT);
- gcc_assert (sel_rtx);
-
- /* Broadcast the low byte each element into each of its bytes. */
- for (i = 0; i < w; ++i)
- {
- int this_e = i / u * u;
- if (BYTES_BIG_ENDIAN)
- this_e += u - 1;
- vec[i] = GEN_INT (this_e);
- }
- vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
- sel_rtx = gen_lowpart (qimode, sel_rtx);
- sel_rtx = expand_vec_perm_expr_1 (icode, gen_reg_rtx (qimode),
- sel_rtx, sel_rtx, vt);
- gcc_assert (sel_rtx != NULL);
-
- /* Add the byte offset to each byte element. */
- /* Note that the definition of the indicies here is memory ordering,
- so there should be no difference between big and little endian. */
- for (i = 0; i < w; ++i)
- vec[i] = GEN_INT (i % u);
- vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
- sel_rtx = expand_simple_binop (qimode, PLUS, sel_rtx, vt,
- NULL_RTX, 0, OPTAB_DIRECT);
- gcc_assert (sel_rtx);
- }
-
- tmp = expand_vec_perm_expr_1 (icode, gen_lowpart (qimode, target),
- gen_lowpart (qimode, v0_rtx),
- gen_lowpart (qimode, v1_rtx), sel_rtx);
- gcc_assert (tmp != NULL);
-
- return gen_lowpart (mode, tmp);
+ sel = expand_simple_binop (mode, ASHIFT, sel, GEN_INT (exact_log2 (u)),
+ sel, 0, OPTAB_DIRECT);
+ gcc_assert (sel != NULL);
+
+ /* Broadcast the low byte each element into each of its bytes. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ {
+ int this_e = i / u * u;
+ if (BYTES_BIG_ENDIAN)
+ this_e += u - 1;
+ RTVEC_ELT (vec, i) = GEN_INT (this_e);
+ }
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel = gen_lowpart (qimode, sel);
+ sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
+ gcc_assert (sel != NULL);
+
+ /* Add the byte offset to each byte element. */
+ /* Note that the definition of the indicies here is memory ordering,
+ so there should be no difference between big and little endian. */
+ vec = rtvec_alloc (w);
+ for (i = 0; i < w; ++i)
+ RTVEC_ELT (vec, i) = GEN_INT (i % u);
+ tmp = gen_rtx_CONST_VECTOR (qimode, vec);
+ sel = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT);
+ gcc_assert (sel != NULL);
+
+ tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
+ gen_lowpart (qimode, v0),
+ gen_lowpart (qimode, v1), sel);
+ if (tmp)
+ tmp = gen_lowpart (mode, tmp);
+ return tmp;
}