diff options
-rw-r--r-- | gcc/config/i386/i386.cc | 60 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 92 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c | 12 |
6 files changed, 198 insertions, 4 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 8989985..a9da66d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21179,6 +21179,32 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, return false; case IOR: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { + /* (ior (not ...) ...) can be a single insn in AVX512. */ + if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F + && (GET_MODE_SIZE (mode) == 64 + || (TARGET_AVX512VL + && (GET_MODE_SIZE (mode) == 32 + || GET_MODE_SIZE (mode) == 16)))) + { + rtx right = GET_CODE (XEXP (x, 1)) != NOT + ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0); + + *total = ix86_vec_cost (mode, cost->sse_op) + + rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed) + + rtx_cost (right, mode, outer_code, opno, speed); + return true; + } + *total = ix86_vec_cost (mode, cost->sse_op); + } + else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + *total = cost->add * 2; + else + *total = cost->add; + return false; + case XOR: if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) *total = ix86_vec_cost (mode, cost->sse_op); @@ -21199,11 +21225,20 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, /* pandn is a single instruction. */ if (GET_CODE (XEXP (x, 0)) == NOT) { + rtx right = XEXP (x, 1); + + /* (and (not ...) (not ...)) can be a single insn in AVX512. */ + if (GET_CODE (right) == NOT && TARGET_AVX512F + && (GET_MODE_SIZE (mode) == 64 + || (TARGET_AVX512VL + && (GET_MODE_SIZE (mode) == 32 + || GET_MODE_SIZE (mode) == 16)))) + right = XEXP (right, 0); + *total = ix86_vec_cost (mode, cost->sse_op) + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, opno, speed) - + rtx_cost (XEXP (x, 1), mode, - outer_code, opno, speed); + + rtx_cost (right, mode, outer_code, opno, speed); return true; } else if (GET_CODE (XEXP (x, 1)) == NOT) @@ -21261,8 +21296,25 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case NOT: if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - // vnot is pxor -1. - *total = ix86_vec_cost (mode, cost->sse_op) + 1; + { + /* (not (xor ...)) can be a single insn in AVX512. */ + if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F + && (GET_MODE_SIZE (mode) == 64 + || (TARGET_AVX512VL + && (GET_MODE_SIZE (mode) == 32 + || GET_MODE_SIZE (mode) == 16)))) + { + *total = ix86_vec_cost (mode, cost->sse_op) + + rtx_cost (XEXP (XEXP (x, 0), 0), mode, + outer_code, opno, speed) + + rtx_cost (XEXP (XEXP (x, 0), 1), mode, + outer_code, opno, speed); + return true; + } + + // vnot is pxor -1. + *total = ix86_vec_cost (mode, cost->sse_op) + 1; + } else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) *total = cost->add * 2; else diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 812cfca..73a8738 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17616,6 +17616,98 @@ operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode)); }) +(define_insn "*iornot<mode>3" + [(set (match_operand:VI 0 "register_operand" "=v,v,v,v") + (ior:VI + (not:VI + (match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m")) + (match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" +{ + if (!register_operand (operands[1], <MODE>mode)) + { + if (TARGET_AVX512VL) + return "vpternlog<ternlogsuffix>\t{$0xdd, %1, %2, %0|%0, %2, %1, 0xdd}"; + return "vpternlog<ternlogsuffix>\t{$0xdd, %g1, %g2, %g0|%g0, %g2, %g1, 0xdd}"; + } + if (TARGET_AVX512VL) + return "vpternlog<ternlogsuffix>\t{$0xbb, %2, %1, %0|%0, %1, %2, 0xbb}"; + return "vpternlog<ternlogsuffix>\t{$0xbb, %g2, %g1, %g0|%g0, %g1, %g2, 0xbb}"; +} + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512VL") + (const_string "<sseinsnmode>") + (const_string "XI"))) + (set (attr "enabled") + (if_then_else (eq_attr "alternative" "2,3") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (const_string "*")))]) + +(define_insn "*xnor<mode>3" + [(set (match_operand:VI 0 "register_operand" "=v,v") + (not:VI + (xor:VI + (match_operand:VI 1 "bcst_vector_operand" "%v,v") + (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" +{ + if (TARGET_AVX512VL) + return "vpternlog<ternlogsuffix>\t{$0x99, %2, %1, %0|%0, %1, %2, 0x99}"; + else + return "vpternlog<ternlogsuffix>\t{$0x99, %g2, %g1, %g0|%g0, %g1, %g2, 0x99}"; +} + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512VL") + (const_string "<sseinsnmode>") + (const_string "XI"))) + (set (attr "enabled") + (if_then_else (eq_attr "alternative" "1") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (const_string "*")))]) + +(define_code_iterator andor [and ior]) +(define_code_attr nlogic [(and "nor") (ior "nand")]) +(define_code_attr ternlog_nlogic [(and "0x11") (ior "0x77")]) + +(define_insn "*<nlogic><mode>3" + [(set (match_operand:VI 0 "register_operand" "=v,v") + (andor:VI + (not:VI (match_operand:VI 1 "bcst_vector_operand" "%v,v")) + (not:VI (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" +{ + if (TARGET_AVX512VL) + return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %2, %1, %0|%0, %1, %2, <ternlog_nlogic>}"; + else + return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %g2, %g1, %g0|%g0, %g1, %g2, <ternlog_nlogic>}"; +} + [(set_attr "type" "sselog") + (set_attr "length_immediate" "1") + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512VL") + (const_string "<sseinsnmode>") + (const_string "XI"))) + (set (attr "enabled") + (if_then_else (eq_attr "alternative" "1") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (const_string "*")))]) + (define_mode_iterator AVX512ZEXTMASK [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) diff --git a/gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h b/gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h new file mode 100644 index 0000000..c68f2a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, SCALAR *f) +{ + return OP (vec, op, suffix) (x, OP (vec, xor, suffix) (DUP (vec, suffix, *f), + DUP (vec, suffix, ~0))); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h b/gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h new file mode 100644 index 0000000..9f0900750 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h @@ -0,0 +1,13 @@ +#include <immintrin.h> + +#define PASTER2(x,y) x##y +#define PASTER3(x,y,z) _mm##x##_##y##_##z +#define OP(vec, op, suffix) PASTER3 (vec, op, suffix) +#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val) + +type +foo (type x, SCALAR *f) +{ + return OP (vec, op, suffix) (OP (vec, xor, suffix) (x, DUP (vec, suffix, ~0)), + DUP (vec, suffix, *f)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c new file mode 100644 index 0000000..7d02f03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op or +#define suffix epi32 +#define SCALAR int + +#include "avx512-binop-not-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c new file mode 100644 index 0000000..c793083 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op or +#define suffix epi32 +#define SCALAR int + +#include "avx512-binop-not-2.h" |