aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/s390
diff options
context:
space:
mode:
authorStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-08-03 10:30:08 +0200
committerStefan Schulze Frielinghaus <stefansf@linux.ibm.com>2023-08-03 10:30:08 +0200
commitfab08d12b40ad637c5a4ce8e026fb43cd3f0fad1 (patch)
tree8e08fd46c48da2a9e54fbf9c5228c3af43c4eae6 /gcc/config/s390
parent8ab12576bc0a8547c95580741b9dc7444ec05f39 (diff)
downloadgcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.zip
gcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.tar.gz
gcc-fab08d12b40ad637c5a4ce8e026fb43cd3f0fad1.tar.bz2
s390: Try to emit vlbr/vstbr instead of vperm et al.
gcc/ChangeLog: * config/s390/s390.cc (expand_perm_as_a_vlbr_vstbr_candidate): New function which handles bswap patterns for vec_perm_const. (vectorize_vec_perm_const_1): Call new function. * config/s390/vector.md (*bswap<mode>): Fix operands in output template. (*vstbr<mode>): New insn. gcc/testsuite/ChangeLog: * gcc.target/s390/s390.exp: Add subdirectory vxe2. * gcc.target/s390/vxe2/vlbr-1.c: New test. * gcc.target/s390/vxe2/vstbr-1.c: New test. * gcc.target/s390/vxe2/vstbr-2.c: New test.
Diffstat (limited to 'gcc/config/s390')
-rw-r--r--gcc/config/s390/s390.cc55
-rw-r--r--gcc/config/s390/vector.md16
2 files changed, 67 insertions, 4 deletions
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 89474fd..6ae81d6 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17704,6 +17704,58 @@ expand_perm_with_vstbrq (const struct expand_vec_perm_d &d)
return false;
}
+/* Try to emit vlbr/vstbr. Note, this is only a candidate insn since
+ TARGET_VECTORIZE_VEC_PERM_CONST operates on vector registers only. Thus,
+ either fwprop, combine et al. "fixes" one of the input/output operands into
+ a memory operand or a splitter has to reverse this into a general vperm
+ operation. */
+
+static bool
+expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
+{
+ static const char perm[4][MAX_VECT_LEN]
+ = { { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 },
+ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 },
+ { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 },
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } };
+
+ if (!TARGET_VXE2 || d.vmode != V16QImode || d.op0 != d.op1)
+ return false;
+
+ if (memcmp (d.perm, perm[0], MAX_VECT_LEN) == 0)
+ {
+ rtx target = gen_rtx_SUBREG (V8HImode, d.target, 0);
+ rtx op0 = gen_rtx_SUBREG (V8HImode, d.op0, 0);
+ emit_insn (gen_bswapv8hi (target, op0));
+ return true;
+ }
+
+ if (memcmp (d.perm, perm[1], MAX_VECT_LEN) == 0)
+ {
+ rtx target = gen_rtx_SUBREG (V4SImode, d.target, 0);
+ rtx op0 = gen_rtx_SUBREG (V4SImode, d.op0, 0);
+ emit_insn (gen_bswapv4si (target, op0));
+ return true;
+ }
+
+ if (memcmp (d.perm, perm[2], MAX_VECT_LEN) == 0)
+ {
+ rtx target = gen_rtx_SUBREG (V2DImode, d.target, 0);
+ rtx op0 = gen_rtx_SUBREG (V2DImode, d.op0, 0);
+ emit_insn (gen_bswapv2di (target, op0));
+ return true;
+ }
+
+ if (memcmp (d.perm, perm[3], MAX_VECT_LEN) == 0)
+ {
+ rtx target = gen_rtx_SUBREG (V1TImode, d.target, 0);
+ rtx op0 = gen_rtx_SUBREG (V1TImode, d.op0, 0);
+ emit_insn (gen_bswapv1ti (target, op0));
+ return true;
+ }
+
+ return false;
+}
/* Try to find the best sequence for the vector permute operation
described by D. Return true if the operation could be
@@ -17726,6 +17778,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
if (expand_perm_with_rot (d))
return true;
+ if (expand_perm_as_a_vlbr_vstbr_candidate (d))
+ return true;
+
return false;
}
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 21bec72..f0e9ed3 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -47,6 +47,7 @@
(define_mode_iterator VI_HW [V16QI V8HI V4SI V2DI])
(define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
(define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI])
+(define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
(define_mode_iterator VI_HW_HS [V8HI V4SI])
(define_mode_iterator VI_HW_QH [V16QI V8HI])
@@ -2876,12 +2877,12 @@
(use (match_dup 2))])]
"TARGET_VX"
{
- static char p[4][16] =
+ static const char p[4][16] =
{ { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }, /* H */
{ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }, /* S */
{ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }, /* D */
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 } }; /* T */
- char *perm;
+ const char *perm;
rtx perm_rtx[16];
switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
@@ -2933,8 +2934,8 @@
"TARGET_VXE2"
"@
#
- vlbr<bhfgq>\t%v0,%v1
- vstbr<bhfgq>\t%v1,%v0"
+ vlbr<bhfgq>\t%v0,%1
+ vstbr<bhfgq>\t%v1,%0"
"&& reload_completed
&& !memory_operand (operands[0], <MODE>mode)
&& !memory_operand (operands[1], <MODE>mode)"
@@ -2947,6 +2948,13 @@
""
[(set_attr "op_type" "*,VRX,VRX")])
+(define_insn "*vstbr<mode>"
+ [(set (match_operand:VI_HW_HSDT 0 "memory_operand" "=R")
+ (bswap:VI_HW_HSDT (match_operand:VI_HW_HSDT 1 "register_operand" "v")))]
+ "TARGET_VXE2"
+ "vstbr<bhfgq>\t%v1,%0"
+ [(set_attr "op_type" "VRX")])
+
;
; Implement len_load/len_store optabs with vll/vstl.
(define_expand "len_load_v16qi"