diff options
author | Stefan Schulze Frielinghaus <stefansf@linux.ibm.com> | 2023-08-03 10:30:08 +0200 |
---|---|---|
committer | Stefan Schulze Frielinghaus <stefansf@linux.ibm.com> | 2023-08-03 10:30:08 +0200 |
commit | fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1 (patch) | |
tree | 8e08fd46c48da2a9e54fbf9c5228c3af43c4eae6 /gcc/config/s390 | |
parent | 8ab12576bc0a8547c95580741b9dc7444ec05f39 (diff) | |
download | gcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.zip gcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.tar.gz gcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.tar.bz2 |
s390: Try to emit vlbr/vstbr instead of vperm et al.
gcc/ChangeLog:
* config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate):
New function which handles bswap patterns for vec_perm_const.
(vectorize_vec_perm_const_1): Call new function.
* config/s390/vector.md (*bswap<mode>): Fix operands in output
template.
(*vstbr<mode>): New insn.
gcc/testsuite/ChangeLog:
* gcc.target/s390/s390.exp: Add subdirectory vxe2.
* gcc.target/s390/vxe2/vlbr-1.c: New test.
* gcc.target/s390/vxe2/vstbr-1.c: New test.
* gcc.target/s390/vxe2/vstbr-2.c: New test.
Diffstat (limited to 'gcc/config/s390')
-rw-r--r-- | gcc/config/s390/s390.cc | 55 | ||||
-rw-r--r-- | gcc/config/s390/vector.md | 16 |
2 files changed, 67 insertions, 4 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 89474fd..6ae81d6 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -17704,6 +17704,58 @@ expand_perm_with_vstbrq (const struct expand_vec_perm_d &d) return false; } +/* Try to emit vlbr/vstbr. Note, this is only a candidate insn since + TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only. Thus, + either fwprop, combine et al. "fixes" one of the input/output operands into + a memory operand or a splitter has to reverse this into a general vperm + operation. */ + +static bool +expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d) +{ + static const char perm[4][MAX_VECT_LEN] + = { { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }, + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }, + { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }, + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } }; + + if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1) + return false; + + if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0) + { + rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0); + rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0); + emit_insn (gen_bswapv8hi (target, op0)); + return true; + } + + if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0) + { + rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0); + rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0); + emit_insn (gen_bswapv4si (target, op0)); + return true; + } + + if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0) + { + rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0); + rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0); + emit_insn (gen_bswapv2di (target, op0)); + return true; + } + + if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0) + { + rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0); + rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0); + emit_insn (gen_bswapv1ti (target, op0)); + return true; + } + + return false; +} /* Try to find the best sequence for the vector permute operation described by D. Return true if the operation could be @@ -17726,6 +17778,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d) if (expand_perm_with_rot (d)) return true; + if (expand_perm_as_a_vlbr_vstbr_candidate (d)) + return true; + return false; } diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 21bec72..f0e9ed3 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -47,6 +47,7 @@ (define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI]) (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI]) (define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI]) +(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI]) (define_mode_iterator VI_HW_HS [V8HI V4SI]) (define_mode_iterator VI_HW_QH [V16QI V8HI]) @@ -2876,12 +2877,12 @@ (use (match_dup 2))])] "TARGET_VX" { - static char p[4][16] = + static const char p[4][16] = { { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }, /* H */ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }, /* S */ { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }, /* D */ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } }; /* T */ - char *perm; + const char *perm; rtx perm_rtx[16]; switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode))) @@ -2933,8 +2934,8 @@ "TARGET_VXE2" "@ # - vlbr<bhfgq>\t%v0,%v1 - vstbr<bhfgq>\t%v1,%v0" + vlbr<bhfgq>\t%v0,%1 + vstbr<bhfgq>\t%v1,%0" "&& reload_completed && !memory_operand (operands[0], <MODE>mode) && !memory_operand (operands[1], <MODE>mode)" @@ -2947,6 +2948,13 @@ "" [(set_attr "op_type" "*,VRX,VRX")]) +(define_insn "*vstbr<mode>" + [(set (match_operand:VI_HW_HSDT 0 "memory_operand" "=R") + (bswap:VI_HW_HSDT (match_operand:VI_HW_HSDT 1 "register_operand" "v")))] + "TARGET_VXE2" + "vstbr<bhfgq>\t%v1,%0" + [(set_attr "op_type" "VRX")]) + ; ; Implement len_load/len_store optabs with vll/vstl. (define_expand "len_load_v16qi" |