aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHu, Lin1 <lin1.hu@intel.com>2024-03-20 16:01:45 +0800
committerHu, Lin1 <lin1.hu@intel.com>2024-06-18 09:09:39 +0800
commit7c6f79eea9febce3b21c5783bac9b0a36e08f003 (patch)
tree0deee4462c88369433cdf46fdaf89a9b1f37d2d7
parent31e275d7d763d3fd3f3dd0a20c54e46dd1c9254d (diff)
downloadgcc-7c6f79eea9febce3b21c5783bac9b0a36e08f003.zip
gcc-7c6f79eea9febce3b21c5783bac9b0a36e08f003.tar.gz
gcc-7c6f79eea9febce3b21c5783bac9b0a36e08f003.tar.bz2
i386: Handle target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2/avx
gcc/ChangeLog: * config/i386/avxintrin.h: Move cmp[p|s][s|d] to [e|x]mmintrin.h, and move macros to xmmintrin.h * config/i386/emmintrin.h: Add cmp[p|s]s intrins. * config/i386/i386-builtin.def: Modify __builtin_ia32_cmp[p|s][s|d]. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Raise error when imm is in range of [8, 32] without avx. * config/i386/predicates.md (cmpps_imm_operand): New predicate. * config/i386/sse.md (avx_cmp<mode>3): Modefy define_insn. (avx_vmcmp<mode>3): Ditto. * config/i386/xmmintrin.h (_CMP_EQ_OQ): New macro for sse/sse2. (_CMP_LT_OS): Ditto (_CMP_LE_OS): Ditto (_CMP_UNORD_Q): Ditto (_CMP_NEQ_UQ): Ditto (_CMP_NLT_US): Ditto (_CMP_NLE_US): Ditto (_CMP_ORD_Q): Ditto (_mm_cmp_ps): Move intrin from avxintrin.h to xmmintrin.h (_mm_cmp_ss): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/sse-cmp-1.c: New test. * gcc.target/i386/sse-cmp-2.c: Ditto. * gcc.target/i386/sse-cmp-error.c: Ditto.
-rw-r--r--gcc/config/i386/avxintrin.h56
-rw-r--r--gcc/config/i386/emmintrin.h22
-rw-r--r--gcc/config/i386/i386-builtin.def10
-rw-r--r--gcc/config/i386/i386-expand.cc6
-rw-r--r--gcc/config/i386/predicates.md5
-rw-r--r--gcc/config/i386/sse.md42
-rw-r--r--gcc/config/i386/xmmintrin.h41
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-cmp-1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-cmp-2.c96
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-cmp-error.c16
10 files changed, 236 insertions, 78 deletions
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index 8021454..ec9b990 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -72,22 +72,6 @@ typedef double __m256d_u __attribute__ ((__vector_size__ (32),
/* Compare predicates for scalar and packed compare intrinsics. */
-/* Equal (ordered, non-signaling) */
-#define _CMP_EQ_OQ 0x00
-/* Less-than (ordered, signaling) */
-#define _CMP_LT_OS 0x01
-/* Less-than-or-equal (ordered, signaling) */
-#define _CMP_LE_OS 0x02
-/* Unordered (non-signaling) */
-#define _CMP_UNORD_Q 0x03
-/* Not-equal (unordered, non-signaling) */
-#define _CMP_NEQ_UQ 0x04
-/* Not-less-than (unordered, signaling) */
-#define _CMP_NLT_US 0x05
-/* Not-less-than-or-equal (unordered, signaling) */
-#define _CMP_NLE_US 0x06
-/* Ordered (nonsignaling) */
-#define _CMP_ORD_Q 0x07
/* Equal (unordered, non-signaling) */
#define _CMP_EQ_UQ 0x08
/* Not-greater-than-or-equal (unordered, signaling) */
@@ -381,18 +365,6 @@ _mm256_xor_ps (__m256 __A, __m256 __B)
}
#ifdef __OPTIMIZE__
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
-{
- return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
-{
- return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
-}
-
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_pd (__m256d __X, __m256d __Y, const int __P)
{
@@ -406,27 +378,7 @@ _mm256_cmp_ps (__m256 __X, __m256 __Y, const int __P)
return (__m256) __builtin_ia32_cmpps256 ((__v8sf)__X, (__v8sf)__Y,
__P);
}
-
-extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
-{
- return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
-}
-
-extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
-{
- return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
-}
#else
-#define _mm_cmp_pd(X, Y, P) \
- ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ps(X, Y, P) \
- ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P)))
-
#define _mm256_cmp_pd(X, Y, P) \
((__m256d) __builtin_ia32_cmppd256 ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), (int)(P)))
@@ -434,14 +386,6 @@ _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
#define _mm256_cmp_ps(X, Y, P) \
((__m256) __builtin_ia32_cmpps256 ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), (int)(P)))
-
-#define _mm_cmp_sd(X, Y, P) \
- ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
- (__v2df)(__m128d)(Y), (int)(P)))
-
-#define _mm_cmp_ss(X, Y, P) \
- ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
- (__v4sf)(__m128)(Y), (int)(P)))
#endif
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 356ca21..d58030e 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -1391,6 +1391,28 @@ _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
}
#ifdef __OPTIMIZE__
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_pd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmppd ((__v2df)__X, (__v2df)__Y, __P);
+}
+
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_sd (__m128d __X, __m128d __Y, const int __P)
+{
+ return (__m128d) __builtin_ia32_cmpsd ((__v2df)__X, (__v2df)__Y, __P);
+}
+#else
+#define _mm_cmp_pd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmppd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+
+#define _mm_cmp_sd(X, Y, P) \
+ ((__m128d) __builtin_ia32_cmpsd ((__v2df)(__m128d)(X), \
+ (__v2df)(__m128d)(Y), (int)(P)))
+#endif
+
+#ifdef __OPTIMIZE__
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_extract_epi16 (__m128i const __A, int const __N)
{
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index edb1d2f..e876e7f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -671,6 +671,9 @@ BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpn
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
@@ -827,6 +830,9 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtb128", I
BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_nothing, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI )
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+
BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
@@ -1077,10 +1083,6 @@ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvp
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
-BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT)
BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 7c6a82e..5c29ee1 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -11835,6 +11835,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_avx_vmcmpv4sf3:
case CODE_FOR_avx_cmpv2df3:
case CODE_FOR_avx_cmpv4sf3:
+ if (CONST_INT_P (op) && IN_RANGE (INTVAL (op), 8, 31))
+ {
+ error ("'%s' needs isa option %s", d->name, "-mavx");
+ return const0_rtx;
+ }
+ /* FALLTHRU */
case CODE_FOR_avx_cmpv4df3:
case CODE_FOR_avx_cmpv8sf3:
case CODE_FOR_avx512f_cmpv8df3_mask:
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1676c50..a894847 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1056,6 +1056,11 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 28, 31)")))
+(define_predicate "cmpps_imm_operand"
+ (ior (match_operand 0 "const_0_to_7_operand")
+ (and (match_test "TARGET_AVX")
+ (match_operand 0 "const_0_to_31_operand"))))
+
;; True if this is a constant appropriate for an increment or decrement.
(define_predicate "incdec_operand"
(match_code "const_int")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 831bdf8..0be2dcd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3734,18 +3734,21 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "avx_cmp<mode>3"
- [(set (match_operand:VF_128_256 0 "register_operand" "=x")
+ [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
(unspec:VF_128_256
- [(match_operand:VF_128_256 1 "register_operand" "x")
- (match_operand:VF_128_256 2 "nonimmediate_operand" "xjm")
- (match_operand:SI 3 "const_0_to_31_operand")]
+ [(match_operand:VF_128_256 1 "register_operand" "0,x")
+ (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xjm")
+ (match_operand:SI 3 "cmpps_imm_operand")]
UNSPEC_PCMP))]
- "TARGET_AVX"
- "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssecmp")
- (set_attr "addr" "gpr16")
+ "TARGET_SSE"
+ "@
+ cmp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "addr" "*,gpr16")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*avx_cmp<mode>3_1"
@@ -3945,21 +3948,24 @@
})
(define_insn "avx_vmcmp<mode>3"
- [(set (match_operand:VF_128 0 "register_operand" "=x")
+ [(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(unspec:VF_128
- [(match_operand:VF_128 1 "register_operand" "x")
- (match_operand:VF_128 2 "nonimmediate_operand" "xjm")
- (match_operand:SI 3 "const_0_to_31_operand")]
+ [(match_operand:VF_128 1 "register_operand" "0,x")
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,xjm")
+ (match_operand:SI 3 "cmpps_imm_operand")]
UNSPEC_PCMP)
(match_dup 1)
(const_int 1)))]
- "TARGET_AVX"
- "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
- [(set_attr "type" "ssecmp")
- (set_attr "addr" "gpr16")
+ "TARGET_SSE"
+ "@
+ cmp<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
+ vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssecmp")
+ (set_attr "addr" "*,gpr16")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "orig,vex")
(set_attr "mode" "<ssescalarmode>")])
(define_insn "*<sse>_maskcmp<mode>3_comm"
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index c90fc71..37e5a94cf 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -108,6 +108,25 @@ typedef float __v4sf __attribute__ ((__vector_size__ (16)));
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
+/* Compare predicates for scalar and packed compare intrinsics. */
+
+/* Equal (ordered, non-signaling) */
+#define _CMP_EQ_OQ 0x00
+/* Less-than (ordered, signaling) */
+#define _CMP_LT_OS 0x01
+/* Less-than-or-equal (ordered, signaling) */
+#define _CMP_LE_OS 0x02
+/* Unordered (non-signaling) */
+#define _CMP_UNORD_Q 0x03
+/* Not-equal (unordered, non-signaling) */
+#define _CMP_NEQ_UQ 0x04
+/* Not-less-than (unordered, signaling) */
+#define _CMP_NLT_US 0x05
+/* Not-less-than-or-equal (unordered, signaling) */
+#define _CMP_NLE_US 0x06
+/* Ordered (nonsignaling) */
+#define _CMP_ORD_Q 0x07
+
/* Create an undefined vector. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_undefined_ps (void)
@@ -434,6 +453,28 @@ _mm_cmpunord_ps (__m128 __A, __m128 __B)
return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B);
}
+#ifdef __OPTIMIZE__
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ps (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpps ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
+{
+ return (__m128) __builtin_ia32_cmpss ((__v4sf)__X, (__v4sf)__Y, __P);
+}
+#else
+#define _mm_cmp_ps(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpps ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+
+#define _mm_cmp_ss(X, Y, P) \
+ ((__m128) __builtin_ia32_cmpss ((__v4sf)(__m128)(X), \
+ (__v4sf)(__m128)(Y), (int)(P)))
+#endif
+
/* Compare the lower SPFP values of A and B and return 1 if true
and 0 if false. */
diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-1.c b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c
new file mode 100644
index 0000000..eff90d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-cmp-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -msse2" } */
+/* { dg-final { scan-assembler-times "cmpsd" 1 } } */
+/* { dg-final { scan-assembler-times "cmpss" 1 } } */
+/* { dg-final { scan-assembler-times "cmppd" 1 } } */
+/* { dg-final { scan-assembler-times "cmpps" 1 } } */
+
+#include <x86intrin.h>
+
+__m128 a1, a2, a3, a4, a5, a6;
+__m128d d1, d2, d3, d4, d5, d6;
+
+void
+test (void)
+{
+ d1 = _mm_cmp_sd (d2, d3, 1);
+ a1 = _mm_cmp_ss (a2, a3, 2);
+ d1 = _mm_cmp_pd (d2, d3, 3);
+ a1 = _mm_cmp_ps (a2, a3, 4);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-2.c b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c
new file mode 100644
index 0000000..77c05c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-cmp-2.c
@@ -0,0 +1,96 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-require-effective-target c99_runtime } */
+/* { dg-options "-O2 -msse2 -std=c99" } */
+
+#include "sse2-check.h"
+#include "emmintrin.h"
+#include <math.h>
+
+double sd1[2]={2134.3343,6678.346};
+double sd2[2]={41124.234,6678.346};
+
+float ss1[4]={2134.3343,6678.346,453.345635,54646.464356};
+float ss2[4]={41124.234,6678.346,8653.65635,856.43576};
+
+union
+{
+ double x[2];
+ long long a[2];
+}dd, ed;
+
+union
+{
+ float x[4];
+ int a[4];
+}ds, es;
+
+#define CHECK(INTSIZE, SIZE, NUNITS, SUFFIX) \
+void check##SUFFIX(unsigned imm, char *id) \
+{ \
+ if(checkV##INTSIZE(d##SIZE.a, e##SIZE.a, NUNITS)){ \
+ printf("mm_cmp_" #SUFFIX "(%s: 0x%x) FAILED\n", id, imm);\
+ abort(); \
+ } \
+}
+
+CHECK (l, d, 2, pd)
+CHECK (i, s, 4, ps)
+CHECK (l, d, 2, sd)
+CHECK (i, s, 4, ss)
+
+#define CMP(imm, rel, SIZE, NUNITS, SUFFIX) \
+ for (i = 0; i < NUNITS; i++) e##SIZE.a[i] = rel ? -1 : 0; \
+ source##SIZE##1 = _mm_loadu_p##SIZE(s##SIZE##1); \
+ source##SIZE##2 = _mm_loadu_p##SIZE(s##SIZE##2); \
+ dest##SIZE = _mm_cmp_##SUFFIX(source##SIZE##1, source##SIZE##2, imm); \
+ _mm_storeu_p##SIZE(d##SIZE.x, dest##SIZE); \
+ check##SUFFIX(imm, "" #imm "");
+
+static void
+sse2_test ()
+{
+ __m128d sourced1, sourced2, destd;
+ __m128 sources1, sources2, dests;
+ int i;
+
+ CMP(_CMP_EQ_OQ, !isunordered(sd1[i], sd2[i]) && sd1[i] == sd2[i], d, 2, pd);
+ CMP(_CMP_LT_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] < sd2[i], d, 2, pd);
+ CMP(_CMP_LE_OS, !isunordered(sd1[i], sd2[i]) && sd1[i] <= sd2[i], d, 2, pd);
+ CMP(_CMP_UNORD_Q, isunordered(sd1[i], sd2[i]), d, 2, pd);
+ CMP(_CMP_NEQ_UQ, isunordered(sd1[i], sd2[i]) || sd1[i] != sd2[i], d, 2, pd);
+ CMP(_CMP_NLT_US, isunordered(sd1[i], sd2[i]) || sd1[i] >= sd2[i], d, 2, pd);
+ CMP(_CMP_NLE_US, isunordered(sd1[i], sd2[i]) || sd1[i] > sd2[i], d, 2, pd);
+ CMP(_CMP_ORD_Q, !isunordered(sd1[i], sd2[i]), d, 2, pd);
+
+ CMP(_CMP_EQ_OQ, !isunordered(ss1[i], ss2[i]) && ss1[i] == ss2[i], s, 4, ps);
+ CMP(_CMP_LT_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] < ss2[i], s, 4, ps);
+ CMP(_CMP_LE_OS, !isunordered(ss1[i], ss2[i]) && ss1[i] <= ss2[i], s, 4, ps);
+ CMP(_CMP_UNORD_Q, isunordered(ss1[i], ss2[i]), s, 4, ps);
+ CMP(_CMP_NEQ_UQ, isunordered(ss1[i], ss2[i]) || ss1[i] != ss2[i], s, 4, ps);
+ CMP(_CMP_NLT_US, isunordered(ss1[i], ss2[i]) || ss1[i] >= ss2[i], s, 4, ps);
+ CMP(_CMP_NLE_US, isunordered(ss1[i], ss2[i]) || ss1[i] > ss2[i], s, 4, ps);
+ CMP(_CMP_ORD_Q, !isunordered(ss1[i], ss2[i]), s, 4, ps);
+
+ dd.x[1] = ed.x[1] = sd1[1];
+
+ CMP(_CMP_EQ_OQ, !isunordered(sd1[0], sd2[0]) && sd1[0] == sd2[0], d, 1, sd);
+ CMP(_CMP_LT_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] < sd2[0], d, 1, sd);
+ CMP(_CMP_LE_OS, !isunordered(sd1[0], sd2[0]) && sd1[0] <= sd2[0], d, 1, sd);
+ CMP(_CMP_UNORD_Q, isunordered(sd1[0], sd2[0]), d, 1, sd);
+ CMP(_CMP_NEQ_UQ, isunordered(sd1[0], sd2[0]) || sd1[0] != sd2[0], d, 1, sd);
+ CMP(_CMP_NLT_US, isunordered(sd1[0], sd2[0]) || sd1[0] >= sd2[0], d, 1, sd);
+ CMP(_CMP_NLE_US, isunordered(sd1[0], sd2[0]) || sd1[0] > sd2[0], d, 1, sd);
+ CMP(_CMP_ORD_Q, !isunordered(sd1[0], sd2[0]), d, 1, sd);
+
+ for(i = 1; i < 4; i++) ds.x[i] = es.x[i] = ss1[i];
+
+ CMP(_CMP_EQ_OQ, !isunordered(ss1[0], ss2[0]) && ss1[0] == ss2[0], s, 1, ss);
+ CMP(_CMP_LT_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] < ss2[0], s, 1, ss);
+ CMP(_CMP_LE_OS, !isunordered(ss1[0], ss2[0]) && ss1[0] <= ss2[0], s, 1, ss);
+ CMP(_CMP_UNORD_Q, isunordered(ss1[0], ss2[0]), s, 1, ss);
+ CMP(_CMP_NEQ_UQ, isunordered(ss1[0], ss2[0]) || ss1[0] != ss2[0], s, 1, ss);
+ CMP(_CMP_NLT_US, isunordered(ss1[0], ss2[0]) || ss1[0] >= ss2[0], s, 1, ss);
+ CMP(_CMP_NLE_US, isunordered(ss1[0], ss2[0]) || ss1[0] > ss2[0], s, 1, ss);
+ CMP(_CMP_ORD_Q, !isunordered(ss1[0], ss2[0]), s, 1, ss);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-cmp-error.c b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c
new file mode 100644
index 0000000..fdf2463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-cmp-error.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -msse2 -mno-avx" } */
+
+#include <x86intrin.h>
+
+volatile __m128 a1, a2, a3;
+volatile __m128d d1, d2, d3;
+
+void
+test (void)
+{
+ d1 = _mm_cmp_sd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmpsd' needs isa option -mavx" } */
+ a1 = _mm_cmp_ss (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpss' needs isa option -mavx" } */
+ d1 = _mm_cmp_pd (d2, d3, 8); /* { dg-error "'__builtin_ia32_cmppd' needs isa option -mavx" } */
+ a1 = _mm_cmp_ps (a2, a3, 8); /* { dg-error "'__builtin_ia32_cmpps' needs isa option -mavx" } */
+}