aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-05-12 08:11:18 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-05-12 08:15:33 +0200
commitb1f7fd8a2a5558da1e101de11bb1cdba081ce010 (patch)
treeca1920e4aafa057a0a10a834023cb00ab162edc0 /gcc
parent71d38ec80008afdbb9a059253407d80598b765c0 (diff)
downloadgcc-b1f7fd8a2a5558da1e101de11bb1cdba081ce010.zip
gcc-b1f7fd8a2a5558da1e101de11bb1cdba081ce010.tar.gz
gcc-b1f7fd8a2a5558da1e101de11bb1cdba081ce010.tar.bz2
i386: Implement FP vector compares for V2SFmode [PR98218]
Implement FP vector compares for V2SFmode for TARGET_MMX_WITH_SSE. 2021-05-12 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/98218 * config/i386/i386-expand.c (ix86_expand_sse_movcc): Handle V2SF mode. * config/i386/mmx.md (MMXMODE124): New mode iterator. (V2FI): Ditto. (mmxintvecmode): New mode attribute. (mmxintvecmodelower): Ditto. (*mmx_maskcmpv2sf3_comm): New insn pattern. (*mmx_maskcmpv2sf3): Ditto. (vec_cmpv2sfv2si): New expander. (vcond<V2FI:mode>v2si): Ditto. (mmx_vlendvps): New insn pattern. (vcond<MMXMODE124:mode><MMXMODEI:mode>): Also handle V2SFmode. (vcondu<MMXMODE124:mode><MMXMODEI:mode>): Ditto. (vcond_mask_<mode><mmxintvecmodelower>): Ditto. gcc/testsuite/ PR target/98218 * g++.target/i386/pr98218-1.C: Ditto. * gcc.target/i386/pr98218-4.c: New test. * gcc.target/i386/pr98218-1.c: Correct PR number. * gcc.target/i386/pr98218-1a.c: Ditto. * gcc.target/i386/pr98218-2.c: Ditto. * gcc.target/i386/pr98218-2a.c: Ditto. * gcc.target/i386/pr98218-3.c: Ditto. * gcc.target/i386/pr98218-3a.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-expand.c7
-rw-r--r--gcc/config/i386/mmx.md130
-rw-r--r--gcc/testsuite/g++.target/i386/pr98218-1.C20
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-1.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-1a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-2.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-2a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-3.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-3a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98218-4.c16
10 files changed, 161 insertions, 24 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 5cfde5b..dd23008 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3680,6 +3680,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
switch (mode)
{
+ case E_V2SFmode:
+ if (TARGET_SSE4_1)
+ {
+ gen = gen_mmx_blendvps;
+ op_true = force_reg (mode, op_true);
+ }
+ break;
case E_V4SFmode:
if (TARGET_SSE4_1)
gen = gen_sse4_1_blendvps;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f085708..d433c52 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -49,6 +49,7 @@
;; All 8-byte vector modes handled by MMX
(define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
;; Mix-n-match
(define_mode_iterator MMXMODE12 [V8QI V4HI])
@@ -56,12 +57,22 @@
(define_mode_iterator MMXMODE24 [V4HI V2SI])
(define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
+;; All V2S* modes
+(define_mode_iterator V2FI [V2SF V2SI])
+
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
(define_mode_attr mmxdoublemode
[(V8QI "V8HI") (V4HI "V4SI")])
+;; Mapping of vector float modes to an integer mode of the same size
+(define_mode_attr mmxintvecmode
+ [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")])
+
+(define_mode_attr mmxintvecmodelower
+ [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")])
+
(define_mode_attr Yv_Yw
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
@@ -714,6 +725,85 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
+(define_insn "*mmx_maskcmpv2sf3_comm"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x")
+ (match_operator:V2SF 3 "sse_comparison_operator"
+ [(match_operand:V2SF 1 "register_operand" "%0,x")
+ (match_operand:V2SF 2 "register_operand" "x,x")]))]
+ "TARGET_MMX_WITH_SSE
+ && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
+ "@
+ cmp%D3ps\t{%2, %0|%0, %2}
+ vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_maskcmpv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x")
+ (match_operator:V2SF 3 "sse_comparison_operator"
+ [(match_operand:V2SF 1 "register_operand" "0,x")
+ (match_operand:V2SF 2 "register_operand" "x,x")]))]
+ "TARGET_MMX_WITH_SSE"
+ "@
+ cmp%D3ps\t{%2, %0|%0, %2}
+ vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
+
+(define_expand "vec_cmpv2sfv2si"
+ [(set (match_operand:V2SI 0 "register_operand")
+ (match_operator:V2SI 1 ""
+ [(match_operand:V2SF 2 "register_operand")
+ (match_operand:V2SF 3 "register_operand")]))]
+ "TARGET_MMX_WITH_SSE"
+{
+ bool ok = ix86_expand_fp_vec_cmp (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_expand "vcond<mode>v2sf"
+ [(set (match_operand:V2FI 0 "register_operand")
+ (if_then_else:V2FI
+ (match_operator 3 ""
+ [(match_operand:V2SF 4 "register_operand")
+ (match_operand:V2SF 5 "register_operand")])
+ (match_operand:V2FI 1)
+ (match_operand:V2FI 2)))]
+ "TARGET_MMX_WITH_SSE"
+{
+ bool ok = ix86_expand_fp_vcond (operands);
+ gcc_assert (ok);
+ DONE;
+})
+
+(define_insn "mmx_blendvps"
+ [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
+ (unspec:V2SF
+ [(match_operand:V2SF 1 "register_operand" "0,0,x")
+ (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
+ (match_operand:V2SF 3 "register_operand" "Yz,Yz,x")]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+ "@
+ blendvps\t{%3, %2, %0|%0, %2, %3}
+ blendvps\t{%3, %2, %0|%0, %2, %3}
+ vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "noavx,noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix_data16" "1,1,*")
+ (set_attr "prefix_extra" "1")
+ (set_attr "prefix" "orig,orig,vex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "V4SF")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point logical operations
@@ -1657,42 +1747,46 @@
DONE;
})
-(define_expand "vcond<mode><mode>"
- [(set (match_operand:MMXMODEI 0 "register_operand")
- (if_then_else:MMXMODEI
+(define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
+ [(set (match_operand:MMXMODE124 0 "register_operand")
+ (if_then_else:MMXMODE124
(match_operator 3 ""
[(match_operand:MMXMODEI 4 "register_operand")
(match_operand:MMXMODEI 5 "register_operand")])
- (match_operand:MMXMODEI 1)
- (match_operand:MMXMODEI 2)))]
- "TARGET_MMX_WITH_SSE"
+ (match_operand:MMXMODE124 1)
+ (match_operand:MMXMODE124 2)))]
+ "TARGET_MMX_WITH_SSE
+ && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
+ == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
{
bool ok = ix86_expand_int_vcond (operands);
gcc_assert (ok);
DONE;
})
-(define_expand "vcondu<mode><mode>"
- [(set (match_operand:MMXMODEI 0 "register_operand")
- (if_then_else:MMXMODEI
+(define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
+ [(set (match_operand:MMXMODE124 0 "register_operand")
+ (if_then_else:MMXMODE124
(match_operator 3 ""
[(match_operand:MMXMODEI 4 "register_operand")
(match_operand:MMXMODEI 5 "register_operand")])
- (match_operand:MMXMODEI 1)
- (match_operand:MMXMODEI 2)))]
- "TARGET_MMX_WITH_SSE"
+ (match_operand:MMXMODE124 1)
+ (match_operand:MMXMODE124 2)))]
+ "TARGET_MMX_WITH_SSE
+ && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
+ == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
{
bool ok = ix86_expand_int_vcond (operands);
gcc_assert (ok);
DONE;
})
-(define_expand "vcond_mask_<mode><mode>"
- [(set (match_operand:MMXMODEI 0 "register_operand")
- (vec_merge:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand")
- (match_operand:MMXMODEI 2 "register_operand")
- (match_operand:MMXMODEI 3 "register_operand")))]
+(define_expand "vcond_mask_<mode><mmxintvecmodelower>"
+ [(set (match_operand:MMXMODE124 0 "register_operand")
+ (vec_merge:MMXMODE124
+ (match_operand:MMXMODE124 1 "register_operand")
+ (match_operand:MMXMODE124 2 "register_operand")
+ (match_operand:<mmxintvecmode> 3 "register_operand")))]
"TARGET_MMX_WITH_SSE"
{
ix86_expand_sse_movcc (operands[0], operands[3],
diff --git a/gcc/testsuite/g++.target/i386/pr98218-1.C b/gcc/testsuite/g++.target/i386/pr98218-1.C
new file mode 100644
index 0000000..61ea4bf
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr98218-1.C
@@ -0,0 +1,20 @@
+/* PR target/98218 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned int __attribute__((__vector_size__ (8))) v64u32;
+typedef int __attribute__((__vector_size__ (8))) v64s32;
+typedef float __attribute__((__vector_size__ (8))) v64f32;
+
+v64u32 au, bu;
+v64s32 as, bs;
+v64f32 af, bf;
+
+v64u32 tu (v64f32 a, v64f32 b) { return (a > b) ? au : bu; }
+v64s32 ts (v64f32 a, v64f32 b) { return (a > b) ? as : bs; }
+v64f32 fu (v64u32 a, v64u32 b) { return (a > b) ? af : bf; }
+v64f32 fs (v64s32 a, v64s32 b) { return (a > b) ? af : bf; }
+v64f32 ff (v64f32 a, v64f32 b) { return (a > b) ? af : bf; }
+
+/* { dg-final { scan-assembler-times "cmpltps" 3 } } */
+/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1.c b/gcc/testsuite/gcc.target/i386/pr98218-1.c
index 48407da..9d6602c 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-1.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1a.c b/gcc/testsuite/gcc.target/i386/pr98218-1a.c
index 3470c87..2610438 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-1a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-1a.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2.c b/gcc/testsuite/gcc.target/i386/pr98218-2.c
index 0b71612..948bf4f 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-2.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2a.c b/gcc/testsuite/gcc.target/i386/pr98218-2a.c
index 6afd0a4..73c7226 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-2a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-2a.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3.c b/gcc/testsuite/gcc.target/i386/pr98218-3.c
index 83a8c29..1b40d0ce 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-3.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3a.c b/gcc/testsuite/gcc.target/i386/pr98218-3a.c
index 272d54e..cf1d497 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-3a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-3a.c
@@ -1,4 +1,4 @@
-/* PR target/98522 */
+/* PR target/98218 */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -ftree-vectorize -msse2" } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-4.c b/gcc/testsuite/gcc.target/i386/pr98218-4.c
new file mode 100644
index 0000000..647bdb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98218-4.c
@@ -0,0 +1,16 @@
+/* PR target/98218 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned int __attribute__((__vector_size__ (8))) v64u32;
+typedef int __attribute__((__vector_size__ (8))) v64s32;
+typedef float __attribute__((__vector_size__ (8))) v64f32;
+
+v64u32 tu (v64f32 a, v64f32 b) { return a > b; }
+v64s32 ts (v64f32 a, v64f32 b) { return a > b; }
+v64f32 fu (v64u32 a, v64u32 b) { return a > b; }
+v64f32 fs (v64s32 a, v64s32 b) { return a > b; }
+v64f32 ff (v64f32 a, v64f32 b) { return a > b; }
+
+/* { dg-final { scan-assembler-times "cmpltps" 3 } } */
+/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */