diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2024-05-07 07:14:40 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2024-05-07 07:16:58 +0100 |
commit | 79649a5dcd81bc05c0ba591068c9075de43bd417 (patch) | |
tree | 0c833b1c89f8afc6eaf58f0f318afcc9306f0ffe /gcc | |
parent | 0c43c673b0d431ca02d83bf6fae9cd60e9a3d0a8 (diff) | |
download | gcc-79649a5dcd81bc05c0ba591068c9075de43bd417.zip gcc-79649a5dcd81bc05c0ba591068c9075de43bd417.tar.gz gcc-79649a5dcd81bc05c0ba591068c9075de43bd417.tar.bz2 |
PR target/106060: Improved SSE vector constant materialization on x86.
This patch resolves PR target/106060 by providing efficient methods for
materializing/synthesizing special "vector" constants on x86. Currently
there are three methods of materializing a vector constant; the most
general is to load a vector from the constant pool, secondly "duplicated"
constants can be synthesized by moving an integer between units and
broadcasting (of shuffling it), and finally the special cases of the
all-zeros vector and all-ones vectors can be loaded via a single SSE
instruction. This patch handle additional cases that can be synthesized
in two instructions, loading an all-ones vector followed by another SSE
instruction. Following my recent patch for PR target/112992, there's
conveniently a single place in i386-expand.cc where these special cases
can be handled.
Two examples are given in the original bugzilla PR for 106060.
__m256i should_be_cmpeq_abs ()
{
return _mm256_set1_epi8 (1);
}
is now generated (with -O3 -march=x86-64-v3) as:
vpcmpeqd %ymm0, %ymm0, %ymm0
vpabsb %ymm0, %ymm0
ret
and
__m256i should_be_cmpeq_add ()
{
return _mm256_set1_epi8 (-2);
}
is now generated as:
vpcmpeqd %ymm0, %ymm0, %ymm0
vpaddb %ymm0, %ymm0, %ymm0
ret
2024-05-07 Roger Sayle <roger@nextmovesoftware.com>
Hongtao Liu <hongtao.liu@intel.com>
gcc/ChangeLog
PR target/106060
* config/i386/i386-expand.cc (enum ix86_vec_bcast_alg): New.
(struct ix86_vec_bcast_map_simode_t): New type for table below.
(ix86_vec_bcast_map_simode): Table of SImode constants that may
be efficiently synthesized by a ix86_vec_bcast_alg method.
(ix86_vec_bcast_map_simode_cmp): New comparator for bsearch.
(ix86_vector_duplicate_simode_const): Efficiently synthesize
V4SImode and V8SImode constants that duplicate special constants.
(ix86_vector_duplicate_value): Attempt to synthesize "special"
vector constants using ix86_vector_duplicate_simode_const.
* config/i386/i386.cc (ix86_rtx_costs) <case ABS>: ABS of a
vector integer mode costs with a single SSE instruction.
gcc/testsuite/ChangeLog
PR target/106060
* gcc.target/i386/auto-init-8.c: Update test case.
* gcc.target/i386/avx512fp16-13.c: Likewise.
* gcc.target/i386/pr100865-9a.c: Likewise.
* gcc.target/i386/pr101796-1.c: Likewise.
* gcc.target/i386/pr106060-1.c: New test case.
* gcc.target/i386/pr106060-2.c: Likewise.
* gcc.target/i386/pr106060-3.c: Likewise.
* gcc.target/i386/pr70314.c: Update test case.
* gcc.target/i386/vect-shiftv4qi.c: Likewise.
* gcc.target/i386/vect-shiftv8qi.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 364 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/auto-init-8.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512fp16-13.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr100865-9a.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr101796-1.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr106060-1.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr106060-2.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr106060-3.c | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr70314.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c | 2 |
12 files changed, 411 insertions, 13 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 8bb8f21..a613291 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -15696,6 +15696,332 @@ s4fma_expand: gcc_unreachable (); } +/* See below where shifts are handled for explanation of this enum. */ +enum ix86_vec_bcast_alg +{ + VEC_BCAST_PXOR, + VEC_BCAST_PCMPEQ, + VEC_BCAST_PABSB, + VEC_BCAST_PADDB, + VEC_BCAST_PSRLW, + VEC_BCAST_PSRLD, + VEC_BCAST_PSLLW, + VEC_BCAST_PSLLD +}; + +struct ix86_vec_bcast_map_simode_t +{ + unsigned int key; + enum ix86_vec_bcast_alg alg; + unsigned int arg; +}; + +/* This table must be kept sorted as values are looked-up using bsearch. */ +static const ix86_vec_bcast_map_simode_t ix86_vec_bcast_map_simode[] = { + { 0x00000000, VEC_BCAST_PXOR, 0 }, + { 0x00000001, VEC_BCAST_PSRLD, 31 }, + { 0x00000003, VEC_BCAST_PSRLD, 30 }, + { 0x00000007, VEC_BCAST_PSRLD, 29 }, + { 0x0000000f, VEC_BCAST_PSRLD, 28 }, + { 0x0000001f, VEC_BCAST_PSRLD, 27 }, + { 0x0000003f, VEC_BCAST_PSRLD, 26 }, + { 0x0000007f, VEC_BCAST_PSRLD, 25 }, + { 0x000000ff, VEC_BCAST_PSRLD, 24 }, + { 0x000001ff, VEC_BCAST_PSRLD, 23 }, + { 0x000003ff, VEC_BCAST_PSRLD, 22 }, + { 0x000007ff, VEC_BCAST_PSRLD, 21 }, + { 0x00000fff, VEC_BCAST_PSRLD, 20 }, + { 0x00001fff, VEC_BCAST_PSRLD, 19 }, + { 0x00003fff, VEC_BCAST_PSRLD, 18 }, + { 0x00007fff, VEC_BCAST_PSRLD, 17 }, + { 0x0000ffff, VEC_BCAST_PSRLD, 16 }, + { 0x00010001, VEC_BCAST_PSRLW, 15 }, + { 0x0001ffff, VEC_BCAST_PSRLD, 15 }, + { 0x00030003, VEC_BCAST_PSRLW, 14 }, + { 0x0003ffff, VEC_BCAST_PSRLD, 14 }, + { 0x00070007, VEC_BCAST_PSRLW, 13 }, + { 0x0007ffff, VEC_BCAST_PSRLD, 13 }, + { 0x000f000f, VEC_BCAST_PSRLW, 12 }, + { 0x000fffff, VEC_BCAST_PSRLD, 12 }, + { 0x001f001f, VEC_BCAST_PSRLW, 11 }, + { 0x001fffff, VEC_BCAST_PSRLD, 11 }, + { 0x003f003f, VEC_BCAST_PSRLW, 10 }, + { 0x003fffff, VEC_BCAST_PSRLD, 10 }, + { 0x007f007f, VEC_BCAST_PSRLW, 9 }, + { 0x007fffff, VEC_BCAST_PSRLD, 9 }, + { 0x00ff00ff, VEC_BCAST_PSRLW, 8 }, + { 0x00ffffff, VEC_BCAST_PSRLD, 8 }, + { 0x01010101, VEC_BCAST_PABSB, 0 }, + { 0x01ff01ff, VEC_BCAST_PSRLW, 7 }, + { 0x01ffffff, VEC_BCAST_PSRLD, 7 }, + { 0x03ff03ff, VEC_BCAST_PSRLW, 6 }, + { 0x03ffffff, VEC_BCAST_PSRLD, 6 }, + { 0x07ff07ff, VEC_BCAST_PSRLW, 5 }, + { 0x07ffffff, VEC_BCAST_PSRLD, 5 }, + { 0x0fff0fff, VEC_BCAST_PSRLW, 4 }, + { 0x0fffffff, VEC_BCAST_PSRLD, 4 }, + { 0x1fff1fff, VEC_BCAST_PSRLW, 3 }, + { 0x1fffffff, VEC_BCAST_PSRLD, 3 }, + { 0x3fff3fff, VEC_BCAST_PSRLW, 2 }, + { 0x3fffffff, VEC_BCAST_PSRLD, 2 }, + { 0x7fff7fff, VEC_BCAST_PSRLW, 1 }, + { 0x7fffffff, VEC_BCAST_PSRLD, 1 }, + { 0x80000000, VEC_BCAST_PSLLD, 31 }, + { 0x80008000, VEC_BCAST_PSLLW, 15 }, + { 0xc0000000, VEC_BCAST_PSLLD, 30 }, + { 0xc000c000, VEC_BCAST_PSLLW, 14 }, + { 0xe0000000, VEC_BCAST_PSLLD, 29 }, + { 0xe000e000, VEC_BCAST_PSLLW, 13 }, + { 0xf0000000, VEC_BCAST_PSLLD, 28 }, + { 0xf000f000, VEC_BCAST_PSLLW, 12 }, + { 0xf8000000, VEC_BCAST_PSLLD, 27 }, + { 0xf800f800, VEC_BCAST_PSLLW, 11 }, + { 0xfc000000, VEC_BCAST_PSLLD, 26 }, + { 0xfc00fc00, VEC_BCAST_PSLLW, 10 }, + { 0xfe000000, VEC_BCAST_PSLLD, 25 }, + { 0xfe00fe00, VEC_BCAST_PSLLW, 9 }, + { 0xfefefefe, VEC_BCAST_PADDB, 0 }, + { 0xff000000, VEC_BCAST_PSLLD, 24 }, + { 0xff00ff00, VEC_BCAST_PSLLW, 8 }, + { 0xff800000, VEC_BCAST_PSLLD, 23 }, + { 0xff80ff80, VEC_BCAST_PSLLW, 7 }, + { 0xffc00000, VEC_BCAST_PSLLD, 22 }, + { 0xffc0ffc0, VEC_BCAST_PSLLW, 6 }, + { 0xffe00000, VEC_BCAST_PSLLD, 21 }, + { 0xffe0ffe0, VEC_BCAST_PSLLW, 5 }, + { 0xfff00000, VEC_BCAST_PSLLD, 20 }, + { 0xfff0fff0, VEC_BCAST_PSLLW, 4 }, + { 0xfff80000, VEC_BCAST_PSLLD, 19 }, + { 0xfff8fff8, VEC_BCAST_PSLLW, 3 }, + { 0xfffc0000, VEC_BCAST_PSLLD, 18 }, + { 0xfffcfffc, VEC_BCAST_PSLLW, 2 }, + { 0xfffe0000, VEC_BCAST_PSLLD, 17 }, + { 0xfffefffe, VEC_BCAST_PSLLW, 1 }, + { 0xffff0000, VEC_BCAST_PSLLD, 16 }, + { 0xffff8000, VEC_BCAST_PSLLD, 15 }, + { 0xffffc000, VEC_BCAST_PSLLD, 14 }, + { 0xffffe000, VEC_BCAST_PSLLD, 13 }, + { 0xfffff000, VEC_BCAST_PSLLD, 12 }, + { 0xfffff800, VEC_BCAST_PSLLD, 11 }, + { 0xfffffc00, VEC_BCAST_PSLLD, 10 }, + { 0xfffffe00, VEC_BCAST_PSLLD, 9 }, + { 0xffffff00, VEC_BCAST_PSLLD, 8 }, + { 0xffffff80, VEC_BCAST_PSLLD, 7 }, + { 0xffffffc0, VEC_BCAST_PSLLD, 6 }, + { 0xffffffe0, VEC_BCAST_PSLLD, 5 }, + { 0xfffffff0, VEC_BCAST_PSLLD, 4 }, + { 0xfffffff8, VEC_BCAST_PSLLD, 3 }, + { 0xfffffffc, VEC_BCAST_PSLLD, 2 }, + { 0xfffffffe, VEC_BCAST_PSLLD, 1 }, + { 0xffffffff, VEC_BCAST_PCMPEQ, 0 } +}; + +/* Comparator for bsearch on ix86_vec_bcast_map. */ +static int +ix86_vec_bcast_map_simode_cmp (const void *key, const void *entry) +{ + return (*(const unsigned int*)key) + - ((const ix86_vec_bcast_map_simode_t*)entry)->key; +} + +/* A subroutine of ix86_vector_duplicate_value. Tries to efficiently + materialize V4SImode, V8SImode and V16SImode vectors from SImode + integer constants. */ +static bool +ix86_vector_duplicate_simode_const (machine_mode mode, rtx target, + unsigned int val) +{ + const ix86_vec_bcast_map_simode_t *entry; + rtx tmp1, tmp2; + + entry = (const ix86_vec_bcast_map_simode_t*) + bsearch(&val, ix86_vec_bcast_map_simode, + ARRAY_SIZE (ix86_vec_bcast_map_simode), + sizeof (ix86_vec_bcast_map_simode_t), + ix86_vec_bcast_map_simode_cmp); + if (!entry) + return false; + + switch (entry->alg) + { + case VEC_BCAST_PXOR: + if ((mode == V8SImode && !TARGET_AVX2) + || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + return false; + emit_move_insn (target, CONST0_RTX (mode)); + return true; + + case VEC_BCAST_PCMPEQ: + if ((mode == V4SImode && !TARGET_SSE2) + || (mode == V8SImode && !TARGET_AVX2) + || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512))) + return false; + emit_move_insn (target, CONSTM1_RTX (mode)); + return true; + + case VEC_BCAST_PABSB: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16QImode)); + tmp2 = gen_reg_rtx (V16QImode); + emit_insn (gen_absv16qi2 (tmp2, tmp1)); + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V32QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V32QImode)); + tmp2 = gen_reg_rtx (V32QImode); + emit_insn (gen_absv32qi2 (tmp2, tmp1)); + } + else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V64QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); + tmp2 = gen_reg_rtx (V64QImode); + emit_insn (gen_absv64qi2 (tmp2, tmp1)); + } + else + return false; + break; + + case VEC_BCAST_PADDB: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V16QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16QImode)); + tmp2 = gen_reg_rtx (V16QImode); + emit_insn (gen_addv16qi3 (tmp2, tmp1, tmp1)); + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V32QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V32QImode)); + tmp2 = gen_reg_rtx (V32QImode); + emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1)); + } + else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V64QImode); + emit_move_insn (tmp1, CONSTM1_RTX (V64QImode)); + tmp2 = gen_reg_rtx (V64QImode); + emit_insn (gen_addv64qi3 (tmp2, tmp1, tmp1)); + } + else + return false; + break; + + case VEC_BCAST_PSRLW: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V8HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V8HImode)); + tmp2 = gen_reg_rtx (V8HImode); + emit_insn (gen_lshrv8hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V16HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16HImode)); + tmp2 = gen_reg_rtx (V16HImode); + emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V32HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); + tmp2 = gen_reg_rtx (V32HImode); + emit_insn (gen_lshrv32hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else + return false; + break; + + case VEC_BCAST_PSRLD: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V4SImode)); + emit_insn (gen_lshrv4si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V8SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V8SImode)); + emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V16SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); + emit_insn (gen_lshrv16si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else + return false; + break; + + case VEC_BCAST_PSLLW: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V8HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V8HImode)); + tmp2 = gen_reg_rtx (V8HImode); + emit_insn (gen_ashlv8hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V16HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16HImode)); + tmp2 = gen_reg_rtx (V16HImode); + emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V32HImode); + emit_move_insn (tmp1, CONSTM1_RTX (V32HImode)); + tmp2 = gen_reg_rtx (V32HImode); + emit_insn (gen_ashlv32hi3 (tmp2, tmp1, GEN_INT (entry->arg))); + } + else + return false; + break; + + case VEC_BCAST_PSLLD: + if (mode == V4SImode && TARGET_SSE2) + { + tmp1 = gen_reg_rtx (V4SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V4SImode)); + emit_insn (gen_ashlv4si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else if (mode == V8SImode && TARGET_AVX2) + { + tmp1 = gen_reg_rtx (V8SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V8SImode)); + emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512) + { + tmp1 = gen_reg_rtx (V16SImode); + emit_move_insn (tmp1, CONSTM1_RTX (V16SImode)); + emit_insn (gen_ashlv16si3 (target, tmp1, GEN_INT (entry->arg))); + return true; + } + else + return false; + + default: + return false; + } + + emit_move_insn (target, gen_lowpart (mode, tmp2)); + return true; +} + /* A subroutine of ix86_expand_vector_init_duplicate. Tries to fill target with val via vec_duplicate. */ @@ -15705,6 +16031,12 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) bool ok; rtx_insn *insn; rtx dup; + + if ((mode == V4SImode || mode == V8SImode || mode == V16SImode) + && CONST_INT_P (val) + && ix86_vector_duplicate_simode_const (mode, target, INTVAL (val))) + return true; + /* Save/restore recog_data in case this is called from splitters or other routines where recog_data needs to stay valid across force_reg. See PR106577. */ @@ -15801,6 +16133,24 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, } return ix86_vector_duplicate_value (mode, target, val); + case E_V8DImode: + if (CONST_INT_P (val)) + { + int tmp = (int)INTVAL (val); + if (tmp == (int)(INTVAL (val) >> 32)) + { + rtx reg = gen_reg_rtx (V16SImode); + ok = ix86_vector_duplicate_value (V16SImode, reg, + GEN_INT (tmp)); + if (ok) + { + emit_move_insn (target, gen_lowpart (V8DImode, reg)); + return true; + } + } + } + return ix86_vector_duplicate_value (mode, target, val); + case E_V2SImode: case E_V2SFmode: if (!mmx_ok) @@ -15814,7 +16164,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, case E_V4SFmode: case E_V4SImode: case E_V16SImode: - case E_V8DImode: case E_V16SFmode: case E_V8DFmode: return ix86_vector_duplicate_value (mode, target, val); @@ -16019,9 +16368,13 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, return true; case E_V32HImode: + case E_V64QImode: + if (CONST_INT_P (val)) + goto widen; + /* FALLTHRU */ + case E_V32HFmode: case E_V32BFmode: - case E_V64QImode: gcc_assert (TARGET_EVEX512); if (TARGET_AVX512BW) return ix86_vector_duplicate_value (mode, target, val); @@ -17021,6 +17374,13 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) all_same = false; } + /* Handle the zero vector as special case. */ + if (n_var == 0 && all_const_zero) + { + emit_move_insn (target, CONST0_RTX (mode)); + return; + } + /* If all values are identical, broadcast the value. */ if (all_same && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4d6b2b9..e67e5f6 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22134,6 +22134,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, *total = cost->fabs; else if (FLOAT_MODE_P (mode)) *total = ix86_vec_cost (mode, cost->sse_op); + else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + *total = cost->sse_op; return false; case SQRT: diff --git a/gcc/testsuite/gcc.target/i386/auto-init-8.c b/gcc/testsuite/gcc.target/i386/auto-init-8.c index 7023d72..666ee14 100644 --- a/gcc/testsuite/gcc.target/i386/auto-init-8.c +++ b/gcc/testsuite/gcc.target/i386/auto-init-8.c @@ -29,7 +29,7 @@ double foo() return result; } -/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 3 "expand" } } */ +/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 1 "expand" } } */ /* { dg-final { scan-rtl-dump-times "\\\[0xfefefefefefefefe\\\]" 2 "expand" } } */ /* { dg-final { scan-rtl-dump-times "0xfffffffffffffffe\\\]\\\) repeated x16" 2 "expand" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c index f431b8a..1cd9a07 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-13.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-13.c @@ -116,7 +116,6 @@ abs512_ph (__m512h a) return _mm512_abs_ph (a); } -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpandd\[^\n\]*%zmm\[0-9\]+" 1 } } */ __m256h @@ -126,7 +125,6 @@ abs256_ph (__m256h a) return _mm256_abs_ph (a); } -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */ __m128h @@ -136,5 +134,4 @@ abs_ph (__m128h a) return _mm_abs_ph (a); } -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9a.c b/gcc/testsuite/gcc.target/i386/pr100865-9a.c index f2ac1bd..91cfeda 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-9a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-9a.c @@ -18,7 +18,7 @@ foo (void) { int i; for (i = 0; i < sizeof (array) / sizeof (array[0]); i++) - array[i] = MK_CONST128_BROADCAST (0x1fff); + array[i] = MK_CONST128_BROADCAST (0x1234); } /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101796-1.c b/gcc/testsuite/gcc.target/i386/pr101796-1.c index b25464d..09532f9 100644 --- a/gcc/testsuite/gcc.target/i386/pr101796-1.c +++ b/gcc/testsuite/gcc.target/i386/pr101796-1.c @@ -15,7 +15,7 @@ volatile __m512i a, b; void foo() { - b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (3)); - b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (4)); - b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (5)); + b = _mm512_srlv_epi16 (a, _mm512_set1_epi16 (4)); + b = _mm512_sllv_epi16 (a, _mm512_set1_epi16 (5)); + b = _mm512_srav_epi16 (a, _mm512_set1_epi16 (6)); } diff --git a/gcc/testsuite/gcc.target/i386/pr106060-1.c b/gcc/testsuite/gcc.target/i386/pr106060-1.c new file mode 100644 index 0000000..a734d56 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106060-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64-v3" } */ +#include <immintrin.h> + +__m256i +foo () +{ + /* shouldnt_have_movabs */ + return _mm256_set1_epi8 (123); +} + +/* { dg-final { scan-assembler-not "movabs" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr106060-2.c b/gcc/testsuite/gcc.target/i386/pr106060-2.c new file mode 100644 index 0000000..23933ab --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106060-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64-v3" } */ +#include <immintrin.h> + +__m256i +foo () +{ + /* should_be_cmpeq_abs */ + return _mm256_set1_epi8 (1); +} + +/* { dg-final { scan-assembler "pcmpeq" } } */ +/* { dg-final { scan-assembler "pabsb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr106060-3.c b/gcc/testsuite/gcc.target/i386/pr106060-3.c new file mode 100644 index 0000000..59c128c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr106060-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=x86-64-v3" } */ +#include <immintrin.h> + +__m256i +foo () +{ + /* should_be_cmpeq_add */ + return _mm256_set1_epi8 (-2); +} + +/* { dg-final { scan-assembler "pcmpeq" } } */ +/* { dg-final { scan-assembler "paddb" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/pr70314.c b/gcc/testsuite/gcc.target/i386/pr70314.c index aad8dd9..181d2b4 100644 --- a/gcc/testsuite/gcc.target/i386/pr70314.c +++ b/gcc/testsuite/gcc.target/i386/pr70314.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=skylake-avx512 -O2" } */ -/* { dg-final { scan-assembler-times "cmp" 2 } } */ +/* { dg-final { scan-assembler-times "cmp\[dq\]" 2 } } */ /* { dg-final { scan-assembler-not "and" } } */ typedef long vec __attribute__((vector_size(16))); diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c index c6a6390..b7e45c2 100644 --- a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c @@ -28,7 +28,7 @@ __vu srl_c (__vu a) return a >> 5; } -/* { dg-final { scan-assembler-times "psrlw" 2 } } */ +/* { dg-final { scan-assembler-times "psrlw" 5 } } */ __vi sra (__vi a, int n) { diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c index 244b0db..2471e6e 100644 --- a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c @@ -28,7 +28,7 @@ __vu srl_c (__vu a) return a >> 5; } -/* { dg-final { scan-assembler-times "psrlw" 2 } } */ +/* { dg-final { scan-assembler-times "psrlw" 5 } } */ __vi sra (__vi a, int n) { |