diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2021-05-05 15:07:25 +0200 |
---|---|---|
committer | Uros Bizjak <ubizjak@gmail.com> | 2021-05-05 15:09:09 +0200 |
commit | f3661f2d63fbc5fd30c24d22137691e16b0a0a17 (patch) | |
tree | aa5b9c7ad403ea5062d269339bd5459e4f397900 /gcc | |
parent | 29745bf06276b9628d08ef1c9e28890cc56df4aa (diff) | |
download | gcc-f3661f2d63fbc5fd30c24d22137691e16b0a0a17.zip gcc-f3661f2d63fbc5fd30c24d22137691e16b0a0a17.tar.gz gcc-f3661f2d63fbc5fd30c24d22137691e16b0a0a17.tar.bz2 |
i386: Implement integer vector compares for 64bit vectors [PR98218]
Implement integer vector compares for 64bit vectors for TARGET_MMX_WITH_SSE.
2021-05-05 Uroš Bizjak <ubizjak@gmail.com>
gcc/
PR target/98218
* config/i386/i386-expand.c (ix86_expand_int_sse_cmp):
Handle V8QI, V4HI and V2SI modes.
* config/i386/i386.c (ix86_build_const_vector): Handle V2SImode.
(ix86_build_signbit_mask): Ditto.
* config/i386/mmx.md (MMXMODE14): New mode iterator.
(<smaxmin:code><MMXMODE14:mode>3): New expander.
(*mmx_<smaxmin:code><MMXMODE14:mode>3): New insn pattern.
(<umaxmin:code><MMXMODE24:mode>3): New expander.
(*mmx_<umaxmin:code><MMXMODE24:mode>3): New insn pattern.
(vec_cmp<MMXMODEI:mode><MMXMODEI:mode>): New expander.
(vec_cmpu<MMXMODEI:mode><MMXMODEI:mode>): Ditto.
(vcond<MMXMODEI:mode><MMXMODEI:mode>): Ditto.
(vcondu<MMXMODEI:mode><MMXMODEI:mode>): Ditto.
(vcond_mask_<MMXMODEI:mode><MMXMODEI:mode>): Ditto.
gcc/testsuite/
PR target/98218
* gcc.target/i386/pr98218-1.c: New test.
* gcc.target/i386/pr98218-1a.c: Ditto.
* gcc.target/i386/pr98218-2.c: Ditto.
* gcc.target/i386/pr98218-2a.c: Ditto.
* gcc.target/i386/pr98218-3.c: Ditto.
* gcc.target/i386/pr98218-3a.c: Ditto.
* gcc.dg/vect/vect-bool-cmp.c (dg-final):
Scan vect tree dump for "LOOP VECTORIZED", not VECTORIZED.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 118 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-1.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-1a.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-2.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-2a.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-3.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr98218-3a.c | 19 |
10 files changed, 260 insertions, 1 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index fee4d07..4dfe7d6 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -4204,16 +4204,32 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, else if (code == GT && TARGET_SSE4_1) gen = gen_sminv16qi3; break; + case E_V8QImode: + if (code == GTU && TARGET_SSE2) + gen = gen_uminv8qi3; + else if (code == GT && TARGET_SSE4_1) + gen = gen_sminv8qi3; + break; case E_V8HImode: if (code == GTU && TARGET_SSE4_1) gen = gen_uminv8hi3; else if (code == GT && TARGET_SSE2) gen = gen_sminv8hi3; break; + case E_V4HImode: + if (code == GTU && TARGET_SSE4_1) + gen = gen_uminv4hi3; + else if (code == GT && TARGET_SSE2) + gen = gen_sminv4hi3; + break; case E_V4SImode: if (TARGET_SSE4_1) gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; break; + case E_V2SImode: + if (TARGET_SSE4_1) + gen = (code == GTU) ? gen_uminv2si3 : gen_sminv2si3; + break; case E_V2DImode: if (TARGET_AVX512VL) { @@ -4254,6 +4270,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, case E_V8SImode: case E_V4DImode: case E_V4SImode: + case E_V2SImode: case E_V2DImode: { rtx t1, t2, mask; @@ -4278,7 +4295,9 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, case E_V32QImode: case E_V16HImode: case E_V16QImode: + case E_V8QImode: case E_V8HImode: + case E_V4HImode: /* Perform a parallel unsigned saturating subtraction. */ x = gen_reg_rtx (mode); emit_insn (gen_rtx_SET diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 780da10..06b0f58 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -15284,6 +15284,7 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value) case E_V16SImode: case E_V8SImode: case E_V4SImode: + case E_V2SImode: case E_V8DImode: case E_V4DImode: case E_V2DImode: @@ -15334,6 +15335,7 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) case E_V8SFmode: case E_V4SFmode: case E_V2SFmode: + case E_V2SImode: vec_mode = mode; imode = SImode; break; diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 4c2b724..347295a 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -52,6 +52,7 @@ ;; Mix-n-match (define_mode_iterator MMXMODE12 [V8QI V4HI]) +(define_mode_iterator MMXMODE14 [V8QI V2SI]) (define_mode_iterator MMXMODE24 [V4HI V2SI]) (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI]) @@ -1417,6 +1418,31 @@ (set_attr "type" "mmxmul,ssemul,ssemul") (set_attr "mode" "DI,TI,TI")]) +(define_expand "<code><mode>3" + [(set (match_operand:MMXMODE14 0 "register_operand") + (smaxmin:MMXMODE14 + (match_operand:MMXMODE14 1 "register_operand") + (match_operand:MMXMODE14 2 "register_operand")))] + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_insn "*mmx_<code><mode>3" + [(set (match_operand:MMXMODE14 0 "register_operand" "=Yr,*x,Yv") + (smaxmin:MMXMODE14 + (match_operand:MMXMODE14 1 "register_operand" "%0,0,Yv") + (match_operand:MMXMODE14 2 "register_operand" "Yr,*x,Yv")))] + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "@ + p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2} + p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2} + vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) + (define_expand "mmx_<code>v4hi3" [(set (match_operand:V4HI 0 "register_operand") (smaxmin:V4HI @@ -1451,6 +1477,31 @@ (set_attr "type" "mmxadd,sseiadd,sseiadd") (set_attr "mode" "DI,TI,TI")]) +(define_expand "<code><mode>3" + [(set (match_operand:MMXMODE24 0 "register_operand") + (umaxmin:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand") + (match_operand:MMXMODE24 2 "register_operand")))] + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" + "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") + +(define_insn "*mmx_<code><mode>3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=Yr,*x,Yv") + (umaxmin:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "%0,0,Yv") + (match_operand:MMXMODE24 2 "register_operand" "Yr,*x,Yv")))] + "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 + && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" + "@ + p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2} + p<maxmin_int><mmxvecsize>\t{%2, %0|%0, %2} + vp<maxmin_int><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sseiadd") + (set_attr "prefix_extra" "1,1,*") + (set_attr "prefix" "orig,orig,vex") + (set_attr "mode" "TI")]) + (define_expand "mmx_<code>v8qi3" [(set (match_operand:V8QI 0 "register_operand") (umaxmin:V8QI @@ -1582,6 +1633,73 @@ (set_attr "type" "mmxcmp,ssecmp,ssecmp") (set_attr "mode" "DI,TI,TI")]) +(define_expand "vec_cmp<mode><mode>" + [(set (match_operand:MMXMODEI 0 "register_operand") + (match_operator:MMXMODEI 1 "" + [(match_operand:MMXMODEI 2 "register_operand") + (match_operand:MMXMODEI 3 "register_operand")]))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:MMXMODEI 0 "register_operand") + (match_operator:MMXMODEI 1 "" + [(match_operand:MMXMODEI 2 "register_operand") + (match_operand:MMXMODEI 3 "register_operand")]))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_int_vec_cmp (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcond<mode><mode>" + [(set (match_operand:MMXMODEI 0 "register_operand") + (if_then_else:MMXMODEI + (match_operator 3 "" + [(match_operand:MMXMODEI 4 "register_operand") + (match_operand:MMXMODEI 5 "register_operand")]) + (match_operand:MMXMODEI 1) + (match_operand:MMXMODEI 2)))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcondu<mode><mode>" + [(set (match_operand:MMXMODEI 0 "register_operand") + (if_then_else:MMXMODEI + (match_operator 3 "" + [(match_operand:MMXMODEI 4 "register_operand") + (match_operand:MMXMODEI 5 "register_operand")]) + (match_operand:MMXMODEI 1) + (match_operand:MMXMODEI 2)))] + "TARGET_MMX_WITH_SSE" +{ + bool ok = ix86_expand_int_vcond (operands); + gcc_assert (ok); + DONE; +}) + +(define_expand "vcond_mask_<mode><mode>" + [(set (match_operand:MMXMODEI 0 "register_operand") + (vec_merge:MMXMODEI + (match_operand:MMXMODEI 1 "register_operand") + (match_operand:MMXMODEI 2 "register_operand") + (match_operand:MMXMODEI 3 "register_operand")))] + "TARGET_MMX_WITH_SSE" +{ + ix86_expand_sse_movcc (operands[0], operands[3], + operands[1], operands[2]); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations diff --git a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c index 35d2a3c..c97da52 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c +++ b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp.c @@ -253,4 +253,4 @@ main (int argc, char **argv) check (res, ne); } -/* { dg-final { scan-tree-dump-times "VECTORIZED" 18 "vect" { target sse4_runtime } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 18 "vect" { target sse4_runtime } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1.c b/gcc/testsuite/gcc.target/i386/pr98218-1.c new file mode 100644 index 0000000..48407da --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-1.c @@ -0,0 +1,21 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2" } */ + +typedef char vec __attribute__((vector_size(8))); + +vec lt (vec a, vec b) { return a < b; } +vec le (vec a, vec b) { return a <= b; } +vec eq (vec a, vec b) { return a == b; } +vec ne (vec a, vec b) { return a != b; } +vec ge (vec a, vec b) { return a >= b; } +vec gt (vec a, vec b) { return a > b; } + +typedef unsigned char uvec __attribute__((vector_size(8))); + +vec ltu (uvec a, uvec b) { return a < b; } +vec leu (uvec a, uvec b) { return a <= b; } +vec geu (uvec a, uvec b) { return a >= b; } +vec gtu (uvec a, uvec b) { return a > b; } + +/* { dg-final { scan-assembler-not "cmpb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1a.c b/gcc/testsuite/gcc.target/i386/pr98218-1a.c new file mode 100644 index 0000000..3470c87 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-1a.c @@ -0,0 +1,19 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +typedef char T; + +#define M 8 + +extern T a[M], b[M], s1[M], s2[M], r[M]; + +void foo (void) +{ + int j; + + for (j = 0; j < M; j++) + r[j] = (a[j] < b[j]) ? s1[j] : s2[j]; +} + +/* { dg-final { scan-assembler "pcmpgtb" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2.c b/gcc/testsuite/gcc.target/i386/pr98218-2.c new file mode 100644 index 0000000..0b71612 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-2.c @@ -0,0 +1,21 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2" } */ + +typedef short vec __attribute__((vector_size(8))); + +vec lt (vec a, vec b) { return a < b; } +vec le (vec a, vec b) { return a <= b; } +vec eq (vec a, vec b) { return a == b; } +vec ne (vec a, vec b) { return a != b; } +vec ge (vec a, vec b) { return a >= b; } +vec gt (vec a, vec b) { return a > b; } + +typedef unsigned short uvec __attribute__((vector_size(8))); + +vec ltu (uvec a, uvec b) { return a < b; } +vec leu (uvec a, uvec b) { return a <= b; } +vec geu (uvec a, uvec b) { return a >= b; } +vec gtu (uvec a, uvec b) { return a > b; } + +/* { dg-final { scan-assembler-not "cmpw" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2a.c b/gcc/testsuite/gcc.target/i386/pr98218-2a.c new file mode 100644 index 0000000..6afd0a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-2a.c @@ -0,0 +1,19 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +typedef short T; + +#define M 4 + +extern T a[M], b[M], s1[M], s2[M], r[M]; + +void foo (void) +{ + int j; + + for (j = 0; j < M; j++) + r[j] = (a[j] < b[j]) ? s1[j] : s2[j]; +} + +/* { dg-final { scan-assembler "pcmpgtw" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3.c b/gcc/testsuite/gcc.target/i386/pr98218-3.c new file mode 100644 index 0000000..83a8c29 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-3.c @@ -0,0 +1,21 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -msse2" } */ + +typedef int vec __attribute__((vector_size(8))); + +vec lt (vec a, vec b) { return a < b; } +vec le (vec a, vec b) { return a <= b; } +vec eq (vec a, vec b) { return a == b; } +vec ne (vec a, vec b) { return a != b; } +vec ge (vec a, vec b) { return a >= b; } +vec gt (vec a, vec b) { return a > b; } + +typedef unsigned int uvec __attribute__((vector_size(8))); + +vec ltu (uvec a, uvec b) { return a < b; } +vec leu (uvec a, uvec b) { return a <= b; } +vec geu (uvec a, uvec b) { return a >= b; } +vec gtu (uvec a, uvec b) { return a > b; } + +/* { dg-final { scan-assembler-not "cmpl" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3a.c b/gcc/testsuite/gcc.target/i386/pr98218-3a.c new file mode 100644 index 0000000..272d54e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98218-3a.c @@ -0,0 +1,19 @@ +/* PR target/98522 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +typedef int T; + +#define M 2 + +extern T a[M], b[M], s1[M], s2[M], r[M]; + +void foo (void) +{ + int j; + + for (j = 0; j < M; j++) + r[j] = (a[j] < b[j]) ? s1[j] : s2[j]; +} + +/* { dg-final { scan-assembler "pcmpgtd" { xfail *-*-* } } } */ |