diff options
author | Andreas Krebbel <krebbel@linux.ibm.com> | 2021-08-04 18:40:11 +0200 |
---|---|---|
committer | Andreas Krebbel <krebbel@linux.ibm.com> | 2021-08-04 18:40:11 +0200 |
commit | 361da782a25031c6ae3967bf8c10a8119845255c (patch) | |
tree | c4e3fc2396f80bf46c2909346bd3dfae2ccd8395 /gcc/config/s390 | |
parent | 6dc8c4656444153c9e2f98d382de39728a849672 (diff) | |
download | gcc-361da782a25031c6ae3967bf8c10a8119845255c.zip gcc-361da782a25031c6ae3967bf8c10a8119845255c.tar.gz gcc-361da782a25031c6ae3967bf8c10a8119845255c.tar.bz2 |
IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi
This patch makes use of the vector permute double immediate
instruction for constant permute vectors.
gcc/ChangeLog:
* config/s390/s390.c (expand_perm_with_vpdi): New function.
(vectorize_vec_perm_const_1): Call expand_perm_with_vpdi.
* config/s390/vector.md (*vpdi1<mode>, @vpdi1<mode>): Enable a
parameterized expander.
(*vpdi4<mode>, @vpdi4<mode>): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/perm-vpdi.c: New test.
Diffstat (limited to 'gcc/config/s390')
-rw-r--r-- | gcc/config/s390/s390.c | 47 | ||||
-rw-r--r-- | gcc/config/s390/vector.md | 5 |
2 files changed, 49 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 8dc805f..673a134 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16979,6 +16979,50 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d) return merge_lo_p || merge_hi_p; } +/* Try to expand the vector permute operation described by D using the + vector permute doubleword immediate instruction vpdi. Return true + if vpdi could be used. + + VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5 + cases are covered by vmrhg and vmrlg already. So we only care + about the 1, 4 cases here. + 1 - First element of src1 and second of src2 + 4 - Second element of src1 and first of src2 */ +static bool +expand_perm_with_vpdi (const struct expand_vec_perm_d &d) +{ + bool vpdi1_p = false; + bool vpdi4_p = false; + rtx op0_reg, op1_reg; + + // Only V2DI and V2DF are supported here. + if (d.nelt != 2) + return false; + + if (d.perm[0] == 0 && d.perm[1] == 3) + vpdi1_p = true; + + if (d.perm[0] == 1 && d.perm[1] == 2) + vpdi4_p = true; + + if (!vpdi1_p && !vpdi4_p) + return false; + + if (d.testing_p) + return true; + + op0_reg = force_reg (GET_MODE (d.op0), d.op0); + op1_reg = force_reg (GET_MODE (d.op1), d.op1); + + if (vpdi1_p) + emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg)); + + if (vpdi4_p) + emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg)); + + return true; +} + /* Try to find the best sequence for the vector permute operation described by D. Return true if the operation could be expanded. */ @@ -16988,6 +17032,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d) if (expand_perm_with_merge (d)) return true; + if (expand_perm_with_vpdi (d)) + return true; + return false; } diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index d224165..70274a6 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -768,7 +768,7 @@ ; First DW of op1 and second DW of op2 -(define_insn "*vpdi1<mode>" +(define_insn "@vpdi1<mode>" [(set (match_operand:V_HW_2 0 "register_operand" "=v") (vec_select:V_HW_2 (vec_concat:<vec_2x_nelts> @@ -780,7 +780,7 @@ [(set_attr "op_type" "VRR")]) ; Second DW of op1 and first of op2 -(define_insn "*vpdi4<mode>" +(define_insn "@vpdi4<mode>" [(set (match_operand:V_HW_2 0 "register_operand" "=v") (vec_select:V_HW_2 (vec_concat:<vec_2x_nelts> @@ -926,7 +926,6 @@ operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8); }) -; vec_perm_const for V2DI using vpdi? ;; ;; Vector integer arithmetic instructions |