aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndreas Krebbel <krebbel@linux.ibm.com>2021-08-04 18:40:11 +0200
committerAndreas Krebbel <krebbel@linux.ibm.com>2021-08-04 18:40:11 +0200
commit361da782a25031c6ae3967bf8c10a8119845255c (patch)
treec4e3fc2396f80bf46c2909346bd3dfae2ccd8395 /gcc
parent6dc8c4656444153c9e2f98d382de39728a849672 (diff)
downloadgcc-361da782a25031c6ae3967bf8c10a8119845255c.zip
gcc-361da782a25031c6ae3967bf8c10a8119845255c.tar.gz
gcc-361da782a25031c6ae3967bf8c10a8119845255c.tar.bz2
IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi
This patch makes use of the vector permute double immediate instruction for constant permute vectors. gcc/ChangeLog: * config/s390/s390.c (expand_perm_with_vpdi): New function. (vectorize_vec_perm_const_1): Call expand_perm_with_vpdi. * config/s390/vector.md (*vpdi1<mode>, @vpdi1<mode>): Enable a parameterized expander. (*vpdi4<mode>, @vpdi4<mode>): Likewise. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/perm-vpdi.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/s390/s390.c47
-rw-r--r--gcc/config/s390/vector.md5
-rw-r--r--gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c49
3 files changed, 98 insertions, 3 deletions
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 8dc805f..673a134 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16979,6 +16979,50 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
return merge_lo_p || merge_hi_p;
}
+/* Try to expand the vector permute operation described by D using the
+ vector permute doubleword immediate instruction vpdi. Return true
+ if vpdi could be used.
+
+ VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
+ cases are covered by vmrhg and vmrlg already. So we only care
+ about the 1, 4 cases here.
+ 1 - First element of src1 and second of src2
+ 4 - Second element of src1 and first of src2 */
+static bool
+expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
+{
+ bool vpdi1_p = false;
+ bool vpdi4_p = false;
+ rtx op0_reg, op1_reg;
+
+ // Only V2DI and V2DF are supported here.
+ if (d.nelt != 2)
+ return false;
+
+ if (d.perm[0] == 0 && d.perm[1] == 3)
+ vpdi1_p = true;
+
+ if (d.perm[0] == 1 && d.perm[1] == 2)
+ vpdi4_p = true;
+
+ if (!vpdi1_p && !vpdi4_p)
+ return false;
+
+ if (d.testing_p)
+ return true;
+
+ op0_reg = force_reg (GET_MODE (d.op0), d.op0);
+ op1_reg = force_reg (GET_MODE (d.op1), d.op1);
+
+ if (vpdi1_p)
+ emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
+
+ if (vpdi4_p)
+ emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg));
+
+ return true;
+}
+
/* Try to find the best sequence for the vector permute operation
described by D. Return true if the operation could be
expanded. */
@@ -16988,6 +17032,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
if (expand_perm_with_merge (d))
return true;
+ if (expand_perm_with_vpdi (d))
+ return true;
+
return false;
}
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index d224165..70274a6 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -768,7 +768,7 @@
; First DW of op1 and second DW of op2
-(define_insn "*vpdi1<mode>"
+(define_insn "@vpdi1<mode>"
[(set (match_operand:V_HW_2 0 "register_operand" "=v")
(vec_select:V_HW_2
(vec_concat:<vec_2x_nelts>
@@ -780,7 +780,7 @@
[(set_attr "op_type" "VRR")])
; Second DW of op1 and first of op2
-(define_insn "*vpdi4<mode>"
+(define_insn "@vpdi4<mode>"
[(set (match_operand:V_HW_2 0 "register_operand" "=v")
(vec_select:V_HW_2
(vec_concat:<vec_2x_nelts>
@@ -926,7 +926,6 @@
operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
})
-; vec_perm_const for V2DI using vpdi?
;;
;; Vector integer arithmetic instructions
diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c
new file mode 100644
index 0000000..cc92531
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/perm-vpdi.c
@@ -0,0 +1,49 @@
+/* { dg-do run { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvpdi\t" 6 } } */
+
+#include "vec-types.h"
+#include <vecintrin.h>
+
+#define GEN_PERMI_BITS(VEC_TYPE, BITS) \
+ VEC_TYPE __attribute__((noinline)) \
+ permi_##BITS##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return (VEC_TYPE){a[((BITS) & 2) >> 1], b[(BITS) & 1] }; }
+
+#define GEN_PERMI(VEC_TYPE) \
+ GEN_PERMI_BITS(VEC_TYPE, 0); \
+ GEN_PERMI_BITS(VEC_TYPE, 1); \
+ GEN_PERMI_BITS(VEC_TYPE, 2); \
+ GEN_PERMI_BITS(VEC_TYPE, 3); \
+
+GEN_PERMI(v2di)
+GEN_PERMI(uv2di)
+GEN_PERMI(v2df)
+
+
+#define CHECK_PERMI_BITS(VEC_TYPE, BITS) \
+ VEC_TYPE r##BITS = permi_##BITS##_##VEC_TYPE (a, b); \
+ if (r##BITS[0] != ((BITS) & 2) >> 1 \
+ || r##BITS[1] != ((BITS) & 1) + 2) \
+ __builtin_abort();
+
+#define CHECK_PERMI(VEC_TYPE) \
+ { \
+ VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0); \
+ VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, 2); \
+ CHECK_PERMI_BITS (VEC_TYPE, 0); \
+ CHECK_PERMI_BITS (VEC_TYPE, 1); \
+ CHECK_PERMI_BITS (VEC_TYPE, 2); \
+ CHECK_PERMI_BITS (VEC_TYPE, 3); \
+ }
+
+int
+main ()
+{
+ CHECK_PERMI (v2di);
+ CHECK_PERMI (uv2di);
+ CHECK_PERMI (v2df);
+}