diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 103 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 14 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 120 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-1.c | 64 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-2.c | 196 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-3.c | 85 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-5.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-6.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-gf2p8affine-7.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/shift-v16qi-4.c | 11 |
11 files changed, 595 insertions, 14 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 12cec61..ef6c12c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -27034,6 +27034,109 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx, return target; } +/* GF2P8AFFINEQB matrixes to implement shift and rotate. */ + +static const uint64_t matrix_ashift[8] = +{ + 0, + 0x0001020408102040, /* 1 l */ + 0x0000010204081020, /* 2 l */ + 0x0000000102040810, /* 3 l */ + 0x0000000001020408, /* 4 l */ + 0x0000000000010204, /* 5 l */ + 0x0000000000000102, /* 6 l */ + 0x0000000000000001 /* 7 l */ +}; + +static const uint64_t matrix_lshiftrt[8] = +{ + 0, + 0x0204081020408000, /* 1 r */ + 0x0408102040800000, /* 2 r */ + 0x0810204080000000, /* 3 r */ + 0x1020408000000000, /* 4 r */ + 0x2040800000000000, /* 5 r */ + 0x4080000000000000, /* 6 r */ + 0x8000000000000000 /* 7 r */ +}; + +static const uint64_t matrix_ashiftrt[8] = +{ + 0, + 0x0204081020408080, /* 1 r */ + 0x0408102040808080, /* 2 r */ + 0x0810204080808080, /* 3 r */ + 0x1020408080808080, /* 4 r */ + 0x2040808080808080, /* 5 r */ + 0x4080808080808080, /* 6 r */ + 0x8080808080808080 /* 7 r */ +}; + +static const uint64_t matrix_rotate[8] = +{ + 0, + 0x8001020408102040, /* 1 rol8 */ + 0x4080010204081020, /* 2 rol8 */ + 0x2040800102040810, /* 3 rol8 */ + 0x1020408001020408, /* 4 rol8 */ + 0x0810204080010204, /* 5 rol8 */ + 0x0408102040800102, /* 6 rol8 */ + 0x0204081020408001 /* 7 rol8 */ +}; + +static const uint64_t matrix_rotatert[8] = +{ + 0, + 0x0204081020408001, /* 1 ror8 */ + 0x0408102040800102, /* 2 ror8 */ + 0x0810204080010204, /* 3 ror8 */ + 0x1020408001020408, /* 4 ror8 */ + 0x2040800102040810, /* 5 ror8 */ + 0x4080010204081020, /* 6 ror8 */ + 0x8001020408102040 /* 7 ror8 */ +}; + +/* Return rtx to load a 64bit GF2P8AFFINE GP(2) matrix implementing a shift + for CODE and shift count COUNT into register with vector of size of SRC. */ + +rtx +ix86_vgf2p8affine_shift_matrix (rtx src, rtx count, enum rtx_code code) +{ + machine_mode mode = GET_MODE (src); + const uint64_t *matrix; + unsigned shift = INTVAL (count) & 7; + gcc_assert (shift > 0 && shift < 8); + + switch (code) + { + case ASHIFT: + matrix = matrix_ashift; + break; + case ASHIFTRT: + matrix = matrix_ashiftrt; + break; + case LSHIFTRT: + matrix = matrix_lshiftrt; + break; + case ROTATE: + matrix = matrix_rotate; + break; + case ROTATERT: + matrix = matrix_rotatert; + break; + default: + gcc_unreachable (); + } + + int nelts = GET_MODE_NUNITS (mode); + rtvec vec = rtvec_alloc (nelts); + uint64_t ma = matrix[shift]; + for (int i = 0; i < nelts; i++) + RTVEC_ELT (vec, i) = gen_int_mode ((ma >> ((i % 8) * 8)) & 0xff, QImode); + + return force_reg (mode, gen_rtx_CONST_VECTOR (mode, vec)); +} + /* Trunc a vector to a narrow vector, like v4di -> v4si. */ void diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index ee6b78b..bdb8bb9 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -448,3 +448,4 @@ extern void ix86_set_handled_components (sbitmap); /* In i386-expand.cc. */ bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*, HOST_WIDE_INT*); +rtx ix86_vgf2p8affine_shift_matrix (rtx, rtx, enum rtx_code); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 55c9b16..b4b84b9 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22102,6 +22102,15 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, } /* FALLTHRU */ case V32QImode: + if (TARGET_GFNI && constant_op1) + { + /* Use vgf2p8affine. One extra load for the mask, but in a loop + with enough registers it will be moved out. So for now don't + account the constant mask load. This is not quite right + for non loop vectorization. */ + extra = 0; + return ix86_vec_cost (mode, cost->sse_op) + extra; + } if (TARGET_AVX2) /* Use vpbroadcast. */ extra = cost->sse_op; @@ -22136,6 +22145,11 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, count = 9; return ix86_vec_cost (mode, cost->sse_op * count) + extra; + case V64QImode: + /* Ignore the mask load for GF2P8AFFINEQB. */ + extra = 0; + return ix86_vec_cost (mode, cost->sse_op) + extra; + case V2DImode: case V4DImode: /* V*DImode arithmetic right shift is emulated. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ec74f93..951ee54 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -326,6 +326,9 @@ (define_mode_iterator VI1_AVX512VL [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) +(define_mode_iterator VI1_AVX512_3264 + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX")]) + ;; All vector modes (define_mode_iterator V [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI @@ -26559,9 +26562,9 @@ ;; XOP packed rotate instructions (define_expand "rotl<mode>3" - [(set (match_operand:VI_128 0 "register_operand") - (rotate:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand") + [(set (match_operand:VI248_128 0 "register_operand") + (rotate:VI248_128 + (match_operand:VI248_128 1 "nonimmediate_operand") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -26590,9 +26593,9 @@ }) (define_expand "rotr<mode>3" - [(set (match_operand:VI_128 0 "register_operand") - (rotatert:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand") + [(set (match_operand:VI248_128 0 "register_operand") + (rotatert:VI248_128 + (match_operand:VI248_128 1 "nonimmediate_operand") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -26964,31 +26967,120 @@ int i; if (<CODE> != ASHIFT) - { - if (CONST_INT_P (operands[2])) - operands[2] = GEN_INT (-INTVAL (operands[2])); - else - negate = true; - } + { + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT (-INTVAL (operands[2])); + else + negate = true; + } par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); tmp = lowpart_subreg (QImode, operands[2], SImode); for (i = 0; i < 16; i++) - XVECEXP (par, 0, i) = tmp; + XVECEXP (par, 0, i) = tmp; tmp = gen_reg_rtx (V16QImode); emit_insn (gen_vec_initv16qiqi (tmp, par)); if (negate) - emit_insn (gen_negv16qi2 (tmp, tmp)); + emit_insn (gen_negv16qi2 (tmp, tmp)); gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); emit_insn (gen (operands[0], operands[1], tmp)); } + else if (TARGET_GFNI && CONST_INT_P (operands[2]) + && (<MODE_SIZE> == 64 + || !(INTVAL (operands[2]) == 7 && <CODE> == ASHIFTRT))) + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], + <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, + GEN_INT (0))); + } else ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); DONE; }) +; not generated by vectorizer? +(define_expand "cond_<insn><mode>" + [(set (match_operand:VI1_AVX512VL 0 "register_operand") + (vec_merge:VI1_AVX512VL + (any_shift:VI1_AVX512VL + (match_operand:VI1_AVX512VL 2 "register_operand") + (match_operand:VI1_AVX512VL 3 "nonimmediate_or_const_vec_dup_operand")) + (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand") + (match_operand:<avx512fmaskmode> 1 "register_operand")))] + "TARGET_GFNI && TARGET_AVX512F" +{ + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[1], matrix, + GEN_INT (0), operands[4], operands[1])); + DONE; +}) + +(define_expand "<insn><mode>3" + [(set (match_operand:VI1_AVX512_3264 0 "register_operand") + (any_rotate:VI1_AVX512_3264 + (match_operand:VI1_AVX512_3264 1 "general_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_GFNI" +{ + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); + emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix, + GEN_INT (0))); + DONE; +}) + +(define_expand "<insn>v16qi3" + [(set (match_operand:V16QI 0 "register_operand") + (any_rotate:V16QI + (match_operand:V16QI 1 "nonimmediate_operand") + (match_operand:SI 2 "general_operand")))] + "TARGET_GFNI || TARGET_XOP" +{ + /* Handle the V16QI XOP case to avoid a conflict with the other expand. */ + if (TARGET_XOP) + { + if (! const_0_to_7_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != QImode) + { + op2 = gen_reg_rtx (QImode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_initv16qiqi (reg, par)); + if (<CODE> == ROTATERT) + { + rtx neg = gen_reg_rtx (V16QImode); + emit_insn (gen_negv16qi2 (neg, reg)); + emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], neg)); + reg = neg; + } + emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], reg)); + DONE; + } + } + else if (TARGET_GFNI && CONST_INT_P (operands[2])) + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>); + emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], operands[1], matrix, + GEN_INT (0))); + DONE; + } + else + FAIL; +}) + (define_expand "ashrv2di3" [(set (match_operand:V2DI 0 "register_operand") (ashiftrt:V2DI diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-1.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-1.c new file mode 100644 index 0000000..e5be3a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-1.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -mavx512vl -mavx512bw -mavx512f -O3" } */ +/* { dg-final { scan-assembler-times "vgf2p8affineqb" 14 } } */ + +#ifndef N +#define N 5 +#endif + +void +ubyteshiftl (unsigned char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] <<= N; +} + +void +ubyteshiftr (unsigned char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] >>= N; +} + +void +ubyteshiftl_mask (unsigned char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + if (a[i] & 1) + a[i] <<= N; +} + +void +sbyteshiftl (signed char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] <<= N; +} + +void +sbyteshiftr (signed char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] >>= N; +} + +void +ubyteror (unsigned char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] = a[i] << N | a[i] >> (8 - N); +} + +void +ubyterol (unsigned char *a, int len) +{ + int i; + for (i = 0; i < len; i++) + a[i] = a[i] >> N | a[i] << (8 - N); +} diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-2.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-2.c new file mode 100644 index 0000000..098361a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-2.c @@ -0,0 +1,196 @@ +/* { dg-do run } */ +/* { dg-options "-mgfni -mavx512vl -mavx512bw -mavx512f -O3 -Wno-shift-count-negative" } */ + +#include <string.h> + +#ifndef N1 +#define N1 5 +#endif + +#ifndef N2 +#define N2 3 +#endif + +#ifndef N3 +#define N3 1 +#endif + +#ifndef N4 +#define N4 7 +#endif + +#ifndef N5 +#define N5 -3 +#endif + +#ifndef FILLER +#define FILLER 0xab +#endif + +#define FUNC(N) \ + void ubyteshiftl##N(unsigned char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] <<= N; \ + } \ + \ + void ubyteshiftr##N(unsigned char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] >>= N; \ + } \ + \ + void ubyteshiftl_mask##N(unsigned char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + if (a[i] & 1) \ + a[i] <<= N; \ + } \ + \ + void sbyteshiftl##N(signed char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] <<= N; \ + } \ + \ + void sbyteshiftr##N(signed char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] >>= N; \ + } \ + \ + void ubyteror##N(unsigned char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] = a[i] << N | a[i] >> (8-N); \ + } \ + \ + void ubyterol##N(unsigned char *a, int len) \ + { \ + int i; \ + for (i = 0; i < len; i++) \ + a[i] = a[i] >> N | a[i] << (8-N); \ + } \ + void ubyteshiftl##N##ref(unsigned char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] <<= N; \ + } \ + \ + void ubyteshiftr##N##ref(unsigned char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] >>= N; \ + } \ + \ + void ubyteshiftl_mask##N##ref(unsigned char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + if (a[i] & 1) \ + a[i] <<= N; \ + } \ + \ + void sbyteshiftl##N##ref(signed char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] <<= N; \ + } \ + \ + void sbyteshiftr##N##ref(signed char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] >>= N; \ + } \ + \ + void ubyteror##N##ref(unsigned char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] = a[i] << N | a[i] >> (8-N); \ + } \ + \ + void ubyterol##N##ref(unsigned char *a, int len) \ + { \ + int i; \ + _Pragma("GCC novector") \ + for (i = 0; i < len; i++) \ + a[i] = a[i] >> N | a[i] << (8-N); \ + } + +FUNC (N1) +FUNC (N2) +FUNC (N3) +FUNC (N4) +FUNC (N5) + +#define TEST(N, func) \ + memset (array, filler, len); \ + func##N (array, len); \ + memset (array2, filler, len); \ + func##N##ref (array2, len); \ + if (memcmp (array, array2, len)) __builtin_abort () + +int main () +{ + __builtin_cpu_init (); + if (!__builtin_cpu_supports ("gfni")) + return 0; + + const unsigned long len = 256; + char array[len], array2[len]; + unsigned char filler = FILLER; + + TEST (N1, ubyteshiftl); + TEST (N1, ubyteshiftl_mask); + TEST (N1, sbyteshiftl); + TEST (N1, sbyteshiftr); + TEST (N1, ubyteror); + TEST (N1, ubyterol); + + TEST (N2, ubyteshiftl); + TEST (N2, ubyteshiftl_mask); + TEST (N2, sbyteshiftl); + TEST (N2, sbyteshiftr); + TEST (N2, ubyteror); + TEST (N2, ubyterol); + + TEST (N3, ubyteshiftl); + TEST (N3, ubyteshiftl_mask); + TEST (N3, sbyteshiftl); + TEST (N3, sbyteshiftr); + TEST (N3, ubyteror); + TEST (N3, ubyterol); + + TEST (N4, ubyteshiftl); + TEST (N4, ubyteshiftl_mask); + TEST (N4, sbyteshiftl); + TEST (N4, sbyteshiftr); + TEST (N4, ubyteror); + TEST (N4, ubyterol); + + TEST (N5, ubyteshiftl); + TEST (N5, ubyteshiftl_mask); + TEST (N5, sbyteshiftl); + TEST (N5, sbyteshiftr); + TEST (N5, ubyteror); + TEST (N5, ubyterol); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-3.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-3.c new file mode 100644 index 0000000..9e5ae5d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-3.c @@ -0,0 +1,85 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -mavx512bw -mavx512f -O3" } */ +/* { dg-final { scan-assembler-times "vgf2p8affineqb" 12 } } */ + +/* Based on a test case from Andrew Pinski */ + +#ifndef N +#define N 5 +#endif + +void +ubyteshiftl (unsigned char *restrict a, unsigned char *restrict b, unsigned char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? (a[i] | b[i]) << N : a[i]; + a[i] = (!c[i]) ? (a[i] ^ b[i]) << N : a[i]; + } +} + +void +ubyteshiftr (unsigned char *restrict a, unsigned char *restrict b, unsigned char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? (a[i] | b[i]) >> N : a[i]; + a[i] = (!c[i]) ? (a[i] ^ b[i]) >> N : a[i]; + } +} + +void +sbyteshiftl (signed char *restrict a, signed char *restrict b, signed char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? (a[i] | b[i]) << N : a[i]; + a[i] = (!c[i]) ? (a[i] ^ b[i]) << N : a[i]; + } +} + +void +sbyteshiftr (signed char *restrict a, signed char *restrict b, signed char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? (a[i] | b[i]) >> N : a[i]; + a[i] = (!c[i]) ? (a[i] ^ b[i]) >> N : a[i]; + } +} + +static inline unsigned char rol8(unsigned char v, int c) +{ + return (v >> c) | (v << (8-c)); +} + +static inline unsigned char ror8(unsigned char v, int c) +{ + return (v << c) | (v >> (8-c)); +} + +void +ubyterol (unsigned char *restrict a, unsigned char *restrict b, unsigned char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? rol8(a[i] | b[i], N) : a[i]; + a[i] = (!c[i]) ? rol8(a[i] ^ b[i], N) : a[i]; + } +} + +void +ubyteror (unsigned char *restrict a, unsigned char *restrict b, unsigned char *restrict c, int len) +{ + int i; + for (i = 0; i < len; i++) + { + a[i] = c[i] ? ror8(a[i] | b[i], N) : a[i]; + a[i] = (!c[i]) ? ror8(a[i] ^ b[i], N) : a[i]; + } +} diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-5.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-5.c new file mode 100644 index 0000000..65fb692 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-5.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -mavx -O3 -Wno-shift-count-negative" } */ +/* { dg-final { scan-assembler-times "vgf2p8affineqb" 31 } } */ + +#include "shift-gf2p8affine-2.c" diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-6.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-6.c new file mode 100644 index 0000000..3391deb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-6.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -O3 -Wno-shift-count-negative" } */ +/* { dg-final { scan-assembler-times "vgf2p8affineqb" 0 } } */ + +#include "shift-gf2p8affine-2.c" diff --git a/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-7.c b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-7.c new file mode 100644 index 0000000..37ba0c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-gf2p8affine-7.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -mavx512vl -mavx512bw -mavx512f -O3 -Wno-shift-count-negative" } */ +/* { dg-final { scan-assembler-times "vgf2p8affineqb" 53 } } */ + +#include "shift-gf2p8affine-2.c" diff --git a/gcc/testsuite/gcc.target/i386/shift-v16qi-4.c b/gcc/testsuite/gcc.target/i386/shift-v16qi-4.c new file mode 100644 index 0000000..edc2b21 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shift-v16qi-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mgfni -mavx512vl -mavx512bw -mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vpcmpgtb" 1 } } */ + +typedef char v16qi __attribute__((vector_size(16))); + +v16qi +foo (v16qi a) +{ + return a >> 7; +} |