aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2016-08-04 23:14:39 +0200
committerUros Bizjak <uros@gcc.gnu.org>2016-08-04 23:14:39 +0200
commit383321ecc99a881f9f1a8249295d23e717cbdf7e (patch)
treeb25121ec4995070a6ebf7eee91ddccf1b1967214
parentd4f7837c5a8aacaf7c651d66cbd3e1363e15d564 (diff)
downloadgcc-383321ecc99a881f9f1a8249295d23e717cbdf7e.zip
gcc-383321ecc99a881f9f1a8249295d23e717cbdf7e.tar.gz
gcc-383321ecc99a881f9f1a8249295d23e717cbdf7e.tar.bz2
re PR target/72805 (AVX512: invalid code generation involving masks)
PR target/72805 * config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]: Cast builtin function result to __mmask16 instead of __mmask8. (_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto. (_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto. (_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto. From-SVN: r239152
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/avx512fintrin.h48
2 files changed, 33 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 025476c..fd6fa78 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2016-08-04 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/72805
+ * config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]:
+ Cast builtin function result to __mmask16 instead of __mmask8.
+ (_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
+ (_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto.
+ (_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
+
2016-08-04 David Malcolm <dmalcolm@redhat.com>
* selftest.h (ASSERT_TRUE): Reimplement in terms of...
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 2f51be9..2b30eae 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -9130,9 +9130,9 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
(__mmask8)-1))
#define _mm512_cmp_epi32_mask(X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(P),\
- (__mmask16)-1))
+ ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P), \
+ (__mmask16)-1))
#define _mm512_cmp_epu64_mask(X, Y, P) \
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
@@ -9140,66 +9140,66 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
(__mmask8)-1))
#define _mm512_cmp_epu32_mask(X, Y, P) \
- ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(P),\
- (__mmask16)-1))
+ ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P), \
+ (__mmask16)-1))
-#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
+#define _mm512_cmp_round_pd_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(P),\
(__mmask8)-1, R))
-#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
+#define _mm512_cmp_round_ps_mask(X, Y, P, R) \
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)-1, R))
-#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
+#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\
(__mmask8)M))
-#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
- ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(P),\
- (__mmask16)M))
+#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P), \
+ (__mmask16)M))
-#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
+#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\
(__mmask8)M))
-#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
- ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(P),\
- (__mmask16)M))
+#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
+ ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(P), \
+ (__mmask16)M))
-#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
+#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(P),\
(__mmask8)M, R))
-#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
+#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)M, R))
-#define _mm_cmp_round_sd_mask(X, Y, P, R) \
+#define _mm_cmp_round_sd_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (int)(P),\
(__mmask8)-1, R))
-#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (int)(P),\
(M), R))
-#define _mm_cmp_round_ss_mask(X, Y, P, R) \
+#define _mm_cmp_round_ss_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (int)(P), \
(__mmask8)-1, R))
-#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (int)(P), \
(M), R))