diff options
author | Stefan Schulze Frielinghaus <stefansf@linux.ibm.com> | 2023-11-09 09:32:58 +0100 |
---|---|---|
committer | Stefan Schulze Frielinghaus <stefansf@linux.ibm.com> | 2023-11-09 09:32:58 +0100 |
commit | 27381fb617b299768624a8e11c431a155cd81bba (patch) | |
tree | 77c3d1d8c7adc8b7ae10e0904572c0b123594d21 | |
parent | fa4dde85af5f08baed60bbb3f8455733edae6afb (diff) | |
download | gcc-27381fb617b299768624a8e11c431a155cd81bba.zip gcc-27381fb617b299768624a8e11c431a155cd81bba.tar.gz gcc-27381fb617b299768624a8e11c431a155cd81bba.tar.bz2 |
s390: Recognize further vpdi and vmr{l,h} pattern
Deal with cases where vpdi and vmr{l,h} are still applicable if the
operands of those instructions are swapped. For example, currently for
V2DI foo (V2DI x)
{
return (V2DI) {x[1], x[0]};
}
the assembler sequence
vlgvg %r1,%v24,1
vzero %v0
vlvgg %v0,%r1,0
vmrhg %v24,%v0,%v24
is emitted. With this patch a single vpdi is emitted.
Extensive tests are included in a subsequent patch of this series where
more cases are covered.
gcc/ChangeLog:
* config/s390/s390.cc (expand_perm_with_merge): Deal with cases
where vmr{l,h} are still applicable if the operands are swapped.
(expand_perm_with_vpdi): Likewise for vpdi.
-rw-r--r-- | gcc/config/s390/s390.cc | 118 |
1 files changed, 90 insertions, 28 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 64f56d8..185eb59 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17532,40 +17532,86 @@ struct expand_vec_perm_d static bool expand_perm_with_merge (const struct expand_vec_perm_d &d) { - bool merge_lo_p = true; - bool merge_hi_p = true; - - if (d.nelt % 2) + static const unsigned char hi_perm_di[2] = {0, 2}; + static const unsigned char hi_perm_si[4] = {0, 4, 1, 5}; + static const unsigned char hi_perm_hi[8] = {0, 8, 1, 9, 2, 10, 3, 11}; + static const unsigned char hi_perm_qi[16] + = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}; + + static const unsigned char hi_perm_di_swap[2] = {2, 0}; + static const unsigned char hi_perm_si_swap[4] = {4, 0, 6, 2}; + static const unsigned char hi_perm_hi_swap[8] = {8, 0, 10, 2, 12, 4, 14, 6}; + static const unsigned char hi_perm_qi_swap[16] + = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}; + + static const unsigned char lo_perm_di[2] = {1, 3}; + static const unsigned char lo_perm_si[4] = {2, 6, 3, 7}; + static const unsigned char lo_perm_hi[8] = {4, 12, 5, 13, 6, 14, 7, 15}; + static const unsigned char lo_perm_qi[16] + = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}; + + static const unsigned char lo_perm_di_swap[2] = {3, 1}; + static const unsigned char lo_perm_si_swap[4] = {5, 1, 7, 3}; + static const unsigned char lo_perm_hi_swap[8] = {9, 1, 11, 3, 13, 5, 15, 7}; + static const unsigned char lo_perm_qi_swap[16] + = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}; + + bool merge_lo_p = false; + bool merge_hi_p = false; + bool swap_operands_p = false; + + if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0) + || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0) + || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi, 8) == 0) + || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi, 16) == 0)) + { + merge_hi_p = true; + } + else if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di_swap, 2) == 0) + || (d.nelt == 4 && memcmp (d.perm, hi_perm_si_swap, 4) == 0) + || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi_swap, 8) == 0) + || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi_swap, 16) == 0)) + { + merge_hi_p = true; + swap_operands_p = true; + } + else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di, 2) == 0) + || (d.nelt == 4 && memcmp (d.perm, lo_perm_si, 4) == 0) + || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi, 8) == 0) + || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi, 16) == 0)) + { + merge_lo_p = true; + } + else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di_swap, 2) == 0) + || (d.nelt == 4 && memcmp (d.perm, lo_perm_si_swap, 4) == 0) + || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi_swap, 8) == 0) + || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi_swap, 16) == 0)) + { + merge_lo_p = true; + swap_operands_p = true; + } + + if (!merge_lo_p && !merge_hi_p) return false; - // For V4SI this checks for: { 0, 4, 1, 5 } - for (int telt = 0; telt < d.nelt; telt++) - if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt) - { - merge_hi_p = false; - break; - } + if (d.testing_p) + return merge_lo_p || merge_hi_p; - if (!merge_hi_p) + rtx op0, op1; + if (swap_operands_p) { - // For V4SI this checks for: { 2, 6, 3, 7 } - for (int telt = 0; telt < d.nelt; telt++) - if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt) - { - merge_lo_p = false; - break; - } + op0 = d.op1; + op1 = d.op0; } else - merge_lo_p = false; - - if (d.testing_p) - return merge_lo_p || merge_hi_p; + { + op0 = d.op0; + op1 = d.op1; + } - if (merge_lo_p || merge_hi_p) - s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p); + s390_expand_merge (d.target, op0, op1, merge_hi_p); - return merge_lo_p || merge_hi_p; + return true; } /* Try to expand the vector permute operation described by D using the @@ -17582,6 +17628,7 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d) { bool vpdi1_p = false; bool vpdi4_p = false; + bool swap_operands_p = false; rtx op0_reg, op1_reg; // Only V2DI and V2DF are supported here. @@ -17590,11 +17637,20 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d) if (d.perm[0] == 0 && d.perm[1] == 3) vpdi1_p = true; - - if ((d.perm[0] == 1 && d.perm[1] == 2) + else if (d.perm[0] == 2 && d.perm[1] == 1) + { + vpdi1_p = true; + swap_operands_p = true; + } + else if ((d.perm[0] == 1 && d.perm[1] == 2) || (d.perm[0] == 1 && d.perm[1] == 0) || (d.perm[0] == 3 && d.perm[1] == 2)) vpdi4_p = true; + else if (d.perm[0] == 3 && d.perm[1] == 0) + { + vpdi4_p = true; + swap_operands_p = true; + } if (!vpdi1_p && !vpdi4_p) return false; @@ -17611,6 +17667,12 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d) op1_reg = op0_reg; else if (d.only_op1) op0_reg = op1_reg; + else if (swap_operands_p) + { + rtx tmp = op0_reg; + op0_reg = op1_reg; + op1_reg = tmp; + } if (vpdi1_p) emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg)); |