aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-11-09 09:32:58 +0100
committerStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-11-09 09:32:58 +0100
commit27381fb617b299768624a8e11c431a155cd81bba (patch)
tree77c3d1d8c7adc8b7ae10e0904572c0b123594d21
parentfa4dde85af5f08baed60bbb3f8455733edae6afb (diff)
downloadgcc-27381fb617b299768624a8e11c431a155cd81bba.zip
gcc-27381fb617b299768624a8e11c431a155cd81bba.tar.gz
gcc-27381fb617b299768624a8e11c431a155cd81bba.tar.bz2
s390: Recognize further vpdi and vmr{l,h} pattern
Deal with cases where vpdi and vmr{l,h} are still applicable if the operands of those instructions are swapped. For example, currently for V2DI foo (V2DI x) { return (V2DI) {x[1], x[0]}; } the assembler sequence vlgvg %r1,%v24,1 vzero %v0 vlvgg %v0,%r1,0 vmrhg %v24,%v0,%v24 is emitted. With this patch a single vpdi is emitted. Extensive tests are included in a subsequent patch of this series where more cases are covered. gcc/ChangeLog: * config/s390/s390.cc (expand_perm_with_merge): Deal with cases where vmr{l,h} are still applicable if the operands are swapped. (expand_perm_with_vpdi): Likewise for vpdi.
-rw-r--r--gcc/config/s390/s390.cc118
1 files changed, 90 insertions, 28 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 64f56d8..185eb59 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17532,40 +17532,86 @@ struct expand_vec_perm_d
static bool
expand_perm_with_merge (const struct expand_vec_perm_d &d)
{
- bool merge_lo_p = true;
- bool merge_hi_p = true;
-
- if (d.nelt % 2)
+ static const unsigned char hi_perm_di[2] = {0, 2};
+ static const unsigned char hi_perm_si[4] = {0, 4, 1, 5};
+ static const unsigned char hi_perm_hi[8] = {0, 8, 1, 9, 2, 10, 3, 11};
+ static const unsigned char hi_perm_qi[16]
+ = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
+
+ static const unsigned char hi_perm_di_swap[2] = {2, 0};
+ static const unsigned char hi_perm_si_swap[4] = {4, 0, 6, 2};
+ static const unsigned char hi_perm_hi_swap[8] = {8, 0, 10, 2, 12, 4, 14, 6};
+ static const unsigned char hi_perm_qi_swap[16]
+ = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14};
+
+ static const unsigned char lo_perm_di[2] = {1, 3};
+ static const unsigned char lo_perm_si[4] = {2, 6, 3, 7};
+ static const unsigned char lo_perm_hi[8] = {4, 12, 5, 13, 6, 14, 7, 15};
+ static const unsigned char lo_perm_qi[16]
+ = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
+
+ static const unsigned char lo_perm_di_swap[2] = {3, 1};
+ static const unsigned char lo_perm_si_swap[4] = {5, 1, 7, 3};
+ static const unsigned char lo_perm_hi_swap[8] = {9, 1, 11, 3, 13, 5, 15, 7};
+ static const unsigned char lo_perm_qi_swap[16]
+ = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
+
+ bool merge_lo_p = false;
+ bool merge_hi_p = false;
+ bool swap_operands_p = false;
+
+ if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi, 16) == 0))
+ {
+ merge_hi_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di_swap, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, hi_perm_si_swap, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi_swap, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi_swap, 16) == 0))
+ {
+ merge_hi_p = true;
+ swap_operands_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, lo_perm_si, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi, 16) == 0))
+ {
+ merge_lo_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di_swap, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, lo_perm_si_swap, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi_swap, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi_swap, 16) == 0))
+ {
+ merge_lo_p = true;
+ swap_operands_p = true;
+ }
+
+ if (!merge_lo_p && !merge_hi_p)
return false;
- // For V4SI this checks for: { 0, 4, 1, 5 }
- for (int telt = 0; telt < d.nelt; telt++)
- if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
- {
- merge_hi_p = false;
- break;
- }
+ if (d.testing_p)
+ return merge_lo_p || merge_hi_p;
- if (!merge_hi_p)
+ rtx op0, op1;
+ if (swap_operands_p)
{
- // For V4SI this checks for: { 2, 6, 3, 7 }
- for (int telt = 0; telt < d.nelt; telt++)
- if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
- {
- merge_lo_p = false;
- break;
- }
+ op0 = d.op1;
+ op1 = d.op0;
}
else
- merge_lo_p = false;
-
- if (d.testing_p)
- return merge_lo_p || merge_hi_p;
+ {
+ op0 = d.op0;
+ op1 = d.op1;
+ }
- if (merge_lo_p || merge_hi_p)
- s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+ s390_expand_merge (d.target, op0, op1, merge_hi_p);
- return merge_lo_p || merge_hi_p;
+ return true;
}
/* Try to expand the vector permute operation described by D using the
@@ -17582,6 +17628,7 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
{
bool vpdi1_p = false;
bool vpdi4_p = false;
+ bool swap_operands_p = false;
rtx op0_reg, op1_reg;
// Only V2DI and V2DF are supported here.
@@ -17590,11 +17637,20 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
if (d.perm[0] == 0 && d.perm[1] == 3)
vpdi1_p = true;
-
- if ((d.perm[0] == 1 && d.perm[1] == 2)
+ else if (d.perm[0] == 2 && d.perm[1] == 1)
+ {
+ vpdi1_p = true;
+ swap_operands_p = true;
+ }
+ else if ((d.perm[0] == 1 && d.perm[1] == 2)
|| (d.perm[0] == 1 && d.perm[1] == 0)
|| (d.perm[0] == 3 && d.perm[1] == 2))
vpdi4_p = true;
+ else if (d.perm[0] == 3 && d.perm[1] == 0)
+ {
+ vpdi4_p = true;
+ swap_operands_p = true;
+ }
if (!vpdi1_p && !vpdi4_p)
return false;
@@ -17611,6 +17667,12 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
op1_reg = op0_reg;
else if (d.only_op1)
op0_reg = op1_reg;
+ else if (swap_operands_p)
+ {
+ rtx tmp = op0_reg;
+ op0_reg = op1_reg;
+ op1_reg = tmp;
+ }
if (vpdi1_p)
emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));