aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2021-06-15 16:25:16 +0800
committerliuhongt <hongtao.liu@intel.com>2021-06-21 16:05:35 +0800
commit08c85f609a73fb36fdcbd9f327a5a645c20ac816 (patch)
treebd9925a840b740531843f8c0697519c2e5fe8006
parent9cedbaab8e048b90ceb9ceef0d851385fae67cde (diff)
downloadgcc-08c85f609a73fb36fdcbd9f327a5a645c20ac816.zip
gcc-08c85f609a73fb36fdcbd9f327a5a645c20ac816.tar.gz
gcc-08c85f609a73fb36fdcbd9f327a5a645c20ac816.tar.bz2
Disparage slightly the mask register alternative for bitwise operations.
The avx512 supports bitwise operations with mask registers, but the throughput of those instructions is much lower than that of the corresponding gpr version, so we would additionally disparages slightly the mask register alternative for bitwise operations in the LRA. Also when allocano cost of GENERAL_REGS is same as MASK_REGS, allocate MASK_REGS first since it has already been disparaged. gcc/ChangeLog: PR target/101142 * config/i386/i386.md: (*anddi_1): Disparage slightly the mask register alternative. (*and<mode>_1): Ditto. (*andqi_1): Ditto. (*andn<mode>_1): Ditto. (*<code><mode>_1): Ditto. (*<code>qi_1): Ditto. (*one_cmpl<mode>2_1): Ditto. (*one_cmplsi2_1_zext): Ditto. (*one_cmplqi2_1): Ditto. * config/i386/i386.c (x86_order_regs_for_local_alloc): Change the order of mask registers to be before general registers. gcc/testsuite/ChangeLog: PR target/101142 * gcc.target/i386/spill_to_mask-1.c: Adjust testcase. * gcc.target/i386/spill_to_mask-2.c: Adjust testcase. * gcc.target/i386/spill_to_mask-3.c: Adjust testcase. * gcc.target/i386/spill_to_mask-4.c: Adjust testcase.
-rw-r--r--gcc/config/i386/i386.c13
-rw-r--r--gcc/config/i386/i386.md20
-rw-r--r--gcc/testsuite/gcc.target/i386/spill_to_mask-1.c89
-rw-r--r--gcc/testsuite/gcc.target/i386/spill_to_mask-2.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/spill_to_mask-3.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/spill_to_mask-4.c11
6 files changed, 96 insertions, 59 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7d0d414..c3740ff 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20475,6 +20475,15 @@ x86_order_regs_for_local_alloc (void)
int pos = 0;
int i;
+ /* When allocano cost of GENERAL_REGS is same as MASK_REGS, allocate
+ MASK_REGS first since it has already been disparaged. This is for
+ testcase bitwise_mask_op3.c where the input is allocated as mask
+ registers, then mask bitwise instructions should be used there.
+ Refer to pr101142. */
+ /* Mask register. */
+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
+ reg_alloc_order [pos++] = i;
+
/* First allocate the local general purpose registers. */
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
@@ -20501,10 +20510,6 @@ x86_order_regs_for_local_alloc (void)
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
- /* Mask register. */
- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
- reg_alloc_order [pos++] = i;
-
/* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2333261..9116828 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9138,7 +9138,7 @@
})
(define_insn "*anddi_1"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,k")
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k")
(and:DI
(match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k")
(match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k")))
@@ -9226,7 +9226,7 @@
(set_attr "mode" "SI")])
(define_insn "*and<mode>_1"
- [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,k")
+ [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k")
(and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k")
(match_operand:SWI24 2 "<general_operand>" "r<i>,m,L,k")))
(clobber (reg:CC FLAGS_REG))]
@@ -9255,7 +9255,7 @@
(set_attr "mode" "<MODE>,<MODE>,SI,<MODE>")])
(define_insn "*andqi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
(match_operand:QI 2 "general_operand" "qn,m,rn,k")))
(clobber (reg:CC FLAGS_REG))]
@@ -9651,7 +9651,7 @@
})
(define_insn "*andn<mode>_1"
- [(set (match_operand:SWI48 0 "register_operand" "=r,r,k")
+ [(set (match_operand:SWI48 0 "register_operand" "=r,r,?k")
(and:SWI48
(not:SWI48 (match_operand:SWI48 1 "register_operand" "r,r,k"))
(match_operand:SWI48 2 "nonimmediate_operand" "r,m,k")))
@@ -9667,7 +9667,7 @@
(set_attr "mode" "<MODE>")])
(define_insn "*andn<mode>_1"
- [(set (match_operand:SWI12 0 "register_operand" "=r,k")
+ [(set (match_operand:SWI12 0 "register_operand" "=r,?k")
(and:SWI12
(not:SWI12 (match_operand:SWI12 1 "register_operand" "r,k"))
(match_operand:SWI12 2 "register_operand" "r,k")))
@@ -9757,7 +9757,7 @@
})
(define_insn "*<code><mode>_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k")
(any_or:SWI248
(match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k")
(match_operand:SWI248 2 "<general_operand>" "r<i>,m,k")))
@@ -9847,7 +9847,7 @@
(set_attr "mode" "SI")])
(define_insn "*<code>qi_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k")
(any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k")
(match_operand:QI 2 "general_operand" "qn,m,rn,k")))
(clobber (reg:CC FLAGS_REG))]
@@ -10603,7 +10603,7 @@
"split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
(define_insn "*one_cmpl<mode>2_1"
- [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,k")
+ [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k")
(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))]
"ix86_unary_operator_ok (NOT, <MODE>mode, operands)"
"@
@@ -10620,7 +10620,7 @@
(set_attr "mode" "<MODE>")])
(define_insn "*one_cmplsi2_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,k")
+ [(set (match_operand:DI 0 "register_operand" "=r,?k")
(zero_extend:DI
(not:SI (match_operand:SI 1 "register_operand" "0,k"))))]
"TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)"
@@ -10632,7 +10632,7 @@
(set_attr "mode" "SI,SI")])
(define_insn "*one_cmplqi2_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,k")
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k")
(not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))]
"ix86_unary_operator_ok (NOT, QImode, operands)"
"@
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
index c5043e2..94d6764 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
@@ -1,15 +1,31 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u32
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE32" } */
typedef unsigned long long u64;
typedef unsigned int u32;
typedef unsigned short u16;
typedef unsigned char u8;
+#ifdef DTYPE32
+typedef u32 DTYPE;
+#define byteswap byteswapu32
+#endif
+
+#ifdef DTYPE16
+typedef u16 DTYPE;
+#define byteswap byteswapu16
+#endif
+
+#ifdef DTYPE8
+typedef u16 DTYPE;
+#define byteswap byteswapu8
+#endif
+
+#ifdef DTYPE64
+typedef u16 DTYPE;
+#define byteswap byteswapu64
+#endif
+
#define R(x,n) ( (x >> n) | (x << (32 - n)))
#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
@@ -23,36 +39,51 @@ typedef unsigned char u8;
d += tmp1; \
}
-static inline DTYPE byteswap(DTYPE x)
+static inline u32 byteswapu32(u32 x)
{
- x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
- x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
- return x;
+ x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
+ x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
+ return x;
}
-#define BE_LOAD32(n,b,i) (n) = byteswap(*(DTYPE *)(b + i))
+static inline u16 byteswapu16(u16 x)
+{
+ x = (x & 0x00FF) << 8 | (x & 0xFF00) >> 8;
+ return x;
+}
+
+static inline u8 byteswapu8(u8 x)
+{
+ return x;
+}
+
+static inline u64 byteswapu64(u64 x)
+{
+ x = ((u64)(byteswapu32 (x & 0x00000000FFFFFFFF))) << 32 | byteswapu32((x & 0xFFFFFFFF00000000) >> 32);
+ return x;
+}
-void foo (u8 *in, DTYPE out[8], const DTYPE C[16])
+void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
{
DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
w8, w9, w10, w11, w12, w13, w14, w15;
- w0 = byteswap(*(DTYPE *)(in + 0));
- w1 = byteswap(*(DTYPE *)(in + 4));
- w2 = byteswap(*(DTYPE *)(in + 8));
- w3 = byteswap(*(DTYPE *)(in + 12));
- w4 = byteswap(*(DTYPE *)(in + 16));
- w5 = byteswap(*(DTYPE *)(in + 20));
- w6 = byteswap(*(DTYPE *)(in + 24));
- w7 = byteswap(*(DTYPE *)(in + 28));
- w8 = byteswap(*(DTYPE *)(in + 32));
- w9 = byteswap(*(DTYPE *)(in + 36));
- w10 = byteswap(*(DTYPE *)(in + 40));
- w11 = byteswap(*(DTYPE *)(in + 44));
- w12 = byteswap(*(DTYPE *)(in + 48));
- w13 = byteswap(*(DTYPE *)(in + 52));
- w14 = byteswap(*(DTYPE *)(in + 56));
- w15 = byteswap(*(DTYPE *)(in + 60));
+ w0 = byteswap(in[0]);
+ w1 = byteswap(in[1]);
+ w2 = byteswap(in[2]);
+ w3 = byteswap(in[3]);
+ w4 = byteswap(in[4]);
+ w5 = byteswap(in[5]);
+ w6 = byteswap(in[6]);
+ w7 = byteswap(in[7]);
+ w8 = byteswap(in[8]);
+ w9 = byteswap(in[9]);
+ w10 = byteswap(in[10]);
+ w11 = byteswap(in[11]);
+ w12 = byteswap(in[12]);
+ w13 = byteswap(in[13]);
+ w14 = byteswap(in[14]);
+ w15 = byteswap(in[15]);
a = out[0];
b = out[1];
c = out[2];
@@ -90,3 +121,7 @@ void foo (u8 *in, DTYPE out[8], const DTYPE C[16])
}
/* { dg-final { scan-assembler "kmovd" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-2.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-2.c
index 1f0c6b4..c7d0976 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-2.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-2.c
@@ -1,10 +1,9 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u16
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE16" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovw" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-3.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-3.c
index 5b59090..b7a383f 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-3.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-3.c
@@ -1,10 +1,9 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u8
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE8" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovb" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
diff --git a/gcc/testsuite/gcc.target/i386/spill_to_mask-4.c b/gcc/testsuite/gcc.target/i386/spill_to_mask-4.c
index f111cf4..d67038d 100644
--- a/gcc/testsuite/gcc.target/i386/spill_to_mask-4.c
+++ b/gcc/testsuite/gcc.target/i386/spill_to_mask-4.c
@@ -1,10 +1,9 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -march=skylake-avx512" } */
-
-#ifndef DTYPE
-#define DTYPE u64
-#endif
+/* { dg-options "-O2 -march=skylake-avx512 -DDTYPE64" } */
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "kmovq" } } */
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */