aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/i386/avx2intrin.h12
-rw-r--r--gcc/config/i386/avx512fintrin.h706
-rw-r--r--gcc/config/i386/avxintrin.h21
-rw-r--r--gcc/config/i386/emmintrin.h16
-rw-r--r--gcc/config/i386/xmmintrin.h8
6 files changed, 448 insertions, 329 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a0e21f2..4f094ae 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2014-03-24 Ulrich Drepper <drepper@gmail.com>
+
+ * config/i386/avxintrin.h (_mm256_undefined_si256): Define.
+ (_mm256_undefined_ps): Define.
+ (_mm256_undefined_pd): Define.
+ * config/i386/emmintrin.h (_mm_undefined_si128): Define.
+ (_mm_undefined_pd): Define.
+ * config/i386/xmmintrin.h (_mm_undefined_ps): Define.
+ * config/i386/avx512fintrin.h (_mm512_undefined_si512): Define.
+ (_mm512_undefined_ps): Define.
+ (_mm512_undefined_pd): Define.
+ Use _mm*_undefined_*.
+ * config/i386/avx2intrin.h: Use _mm*_undefined_*.
+
2014-03-24 Alex Velenko <Alex.Velenko@arm.com>
* config/aarch64/aarch64-simd-builtins.def (lshr): DI mode excluded.
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 33b12e1..d04c972 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -1233,10 +1233,10 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_pd (double const *base, __m128i index, const int scale)
{
- __v2df src = _mm_setzero_pd ();
- __v2df mask = _mm_cmpeq_pd (src, src);
+ __v2df zero = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (zero, zero);
- return (__m128d) __builtin_ia32_gathersiv2df (src,
+ return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
base,
(__v4si)index,
mask,
@@ -1259,10 +1259,10 @@ extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
{
- __v4df src = _mm256_setzero_pd ();
- __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
+ __v4df zero = _mm256_setzero_pd ();
+ __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
- return (__m256d) __builtin_ia32_gathersiv4df (src,
+ return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
base,
(__v4si)index,
mask,
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 422e83b..20b3baf 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -108,6 +108,30 @@ _mm512_set_ps (float __A, float __B, float __C, float __D,
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_ps (void)
+{
+ __m512 __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_pd (void)
+{
+ __m512d __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_undefined_si512 (void)
+{
+ __m512i __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
{
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
@@ -389,7 +413,7 @@ _mm512_mullo_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -420,7 +444,7 @@ _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -452,7 +476,7 @@ _mm512_srav_epi32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -484,7 +508,7 @@ _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -516,7 +540,7 @@ _mm512_add_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -548,7 +572,7 @@ _mm512_sub_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -580,7 +604,7 @@ _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -612,7 +636,7 @@ _mm512_srav_epi64 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -644,7 +668,7 @@ _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -676,7 +700,7 @@ _mm512_add_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -708,7 +732,7 @@ _mm512_mul_epi32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -739,7 +763,7 @@ _mm512_sub_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -771,7 +795,7 @@ _mm512_mul_epu32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -802,7 +826,7 @@ _mm512_slli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -828,7 +852,7 @@ _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_slli_epi64(X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_slli_epi64(W, U, X, C) \
@@ -849,7 +873,7 @@ _mm512_sll_epi64 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -881,7 +905,7 @@ _mm512_srli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -907,7 +931,7 @@ _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_srli_epi64(X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_srli_epi64(W, U, X, C) \
@@ -928,7 +952,7 @@ _mm512_srl_epi64 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -960,7 +984,7 @@ _mm512_srai_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -986,7 +1010,7 @@ _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_srai_epi64(X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_srai_epi64(W, U, X, C) \
@@ -1007,7 +1031,7 @@ _mm512_sra_epi64 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1039,7 +1063,7 @@ _mm512_slli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1065,7 +1089,7 @@ _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_slli_epi32(X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_slli_epi32(W, U, X, C) \
@@ -1086,7 +1110,7 @@ _mm512_sll_epi32 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1118,7 +1142,7 @@ _mm512_srli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1144,7 +1168,7 @@ _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_srli_epi32(X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_srli_epi32(W, U, X, C) \
@@ -1165,7 +1189,7 @@ _mm512_srl_epi32 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1197,7 +1221,7 @@ _mm512_srai_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1223,7 +1247,7 @@ _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
#else
#define _mm512_srai_epi32(X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_srai_epi32(W, U, X, C) \
@@ -1244,7 +1268,7 @@ _mm512_sra_epi32 (__m512i __A, __m128i __B)
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1414,7 +1438,7 @@ _mm512_rcp14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -1443,7 +1467,7 @@ _mm512_rcp14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -1488,7 +1512,7 @@ _mm512_rsqrt14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -1517,7 +1541,7 @@ _mm512_rsqrt14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -1563,7 +1587,7 @@ _mm512_sqrt_round_pd (__m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -1593,7 +1617,7 @@ _mm512_sqrt_round_ps (__m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -1635,7 +1659,7 @@ _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
}
#else
#define _mm512_sqrt_round_pd(A, C) \
- (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
@@ -1644,7 +1668,7 @@ _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sqrt_round_ps(A, C) \
- (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
@@ -1665,7 +1689,7 @@ _mm512_cvtepi8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1694,7 +1718,7 @@ _mm512_cvtepi8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1723,7 +1747,7 @@ _mm512_cvtepi16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1752,7 +1776,7 @@ _mm512_cvtepi16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1781,7 +1805,7 @@ _mm512_cvtepi32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1810,7 +1834,7 @@ _mm512_cvtepu8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1839,7 +1863,7 @@ _mm512_cvtepu8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1868,7 +1892,7 @@ _mm512_cvtepu16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -1897,7 +1921,7 @@ _mm512_cvtepu16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1926,7 +1950,7 @@ _mm512_cvtepu32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -1957,7 +1981,7 @@ _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -1991,7 +2015,7 @@ _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2024,7 +2048,7 @@ _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2058,7 +2082,7 @@ _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2085,7 +2109,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
}
#else
#define _mm512_add_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_add_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
@@ -2094,7 +2118,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_add_round_ps(A, B, C) \
- (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_add_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
@@ -2103,7 +2127,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_sub_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
@@ -2112,7 +2136,7 @@ _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sub_round_ps(A, B, C) \
- (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
@@ -2129,7 +2153,7 @@ _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2163,7 +2187,7 @@ _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2196,7 +2220,7 @@ _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2230,7 +2254,7 @@ _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2294,7 +2318,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
#else
#define _mm512_mul_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
@@ -2303,7 +2327,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_mul_round_ps(A, B, C) \
- (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
@@ -2312,7 +2336,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_div_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_div_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
@@ -2321,7 +2345,7 @@ _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_div_round_ps(A, B, C) \
- (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_div_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
@@ -2350,7 +2374,7 @@ _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2384,7 +2408,7 @@ _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2417,7 +2441,7 @@ _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2451,7 +2475,7 @@ _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2478,7 +2502,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
}
#else
#define _mm512_max_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
+ (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
@@ -2487,7 +2511,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_max_round_ps(A, B, R) \
- (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_pd(), -1, R)
+ (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
@@ -2496,7 +2520,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
#define _mm512_min_round_pd(A, B, R) \
- (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, R)
+ (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_min_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
@@ -2505,7 +2529,7 @@ _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_min_round_ps(A, B, R) \
- (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, R)
+ (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
#define _mm512_mask_min_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
@@ -2522,7 +2546,7 @@ _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -2556,7 +2580,7 @@ _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -2602,7 +2626,7 @@ _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
}
#else
#define _mm512_scalef_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), -1, C)
+ (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
@@ -2611,7 +2635,7 @@ _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_scalef_round_ps(A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), -1, C)
+ (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
@@ -3294,7 +3318,7 @@ _mm512_abs_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3323,7 +3347,7 @@ _mm512_abs_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3350,8 +3374,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastss_ps (__m128 __A)
{
- __v16sf __O;
- return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, __O,
+ return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3377,8 +3402,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastsd_pd (__m128d __A)
{
- __v8df __O;
- return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, __O,
+ return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
+ (__v8df)
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3404,8 +3430,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastd_epi32 (__m128i __A)
{
- __v16si __O;
- return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
+ (__v16si)
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3431,8 +3458,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi32 (int __A)
{
- __v16si __O;
- return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
+ (__v16si)
+ _mm512_undefined_si512 (),
(__mmask16)(-1));
}
@@ -3458,8 +3486,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastq_epi64 (__m128i __A)
{
- __v8di __O;
- return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3485,12 +3514,15 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi64 (long long __A)
{
- __v8di __O;
#ifdef TARGET_64BIT
- return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8)(-1));
#else
- return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, __O,
+ return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8)(-1));
#endif
}
@@ -3529,8 +3561,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f32x4 (__m128 __A)
{
- __v16sf __O;
- return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, __O,
+ return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
+ (__v16sf)
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3557,9 +3590,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i32x4 (__m128i __A)
{
- __v16si __O;
return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
- __O,
+ (__v16si)
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3586,9 +3619,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_f64x4 (__m256d __A)
{
- __v8df __O;
return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
- __O,
+ (__v8df)
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3615,9 +3648,9 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcast_i64x4 (__m256i __A)
{
- __v8di __O;
return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
- __O,
+ (__v8di)
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3738,7 +3771,7 @@ _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
__mask,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3771,7 +3804,7 @@ _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
(__v8di) __B, __imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -3806,7 +3839,7 @@ _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
(__v16si) __B,
__imm,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -3842,7 +3875,7 @@ _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
(__v8df) __B, __imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -3876,7 +3909,7 @@ _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
(__v16sf) __B, __imm,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -3906,7 +3939,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
#else
#define _mm512_shuffle_epi32(X, C) \
((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_shuffle_epi32(W, U, X, C) \
@@ -3922,7 +3955,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
#define _mm512_shuffle_i64x2(X, Y, C) \
((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)_mm512_undefined_si512 (),\
(__mmask8)-1))
#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
@@ -3940,7 +3973,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
#define _mm512_shuffle_i32x4(X, Y, C) \
((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
@@ -3958,7 +3991,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
#define _mm512_shuffle_f64x2(X, Y, C) \
((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(C),\
- (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
@@ -3976,7 +4009,7 @@ _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
#define _mm512_shuffle_f32x4(X, Y, C) \
((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)-1))
#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
@@ -3999,7 +4032,7 @@ _mm512_rolv_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -4031,7 +4064,7 @@ _mm512_rorv_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -4063,7 +4096,7 @@ _mm512_rolv_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -4095,7 +4128,7 @@ _mm512_rorv_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -4127,7 +4160,7 @@ _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
{
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4157,7 +4190,7 @@ _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
{
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4182,7 +4215,7 @@ _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
}
#else
#define _mm512_cvtt_roundpd_epi32(A, B) \
- ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
@@ -4191,7 +4224,7 @@ _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
#define _mm512_cvtt_roundpd_epu32(A, B) \
- ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
@@ -4207,7 +4240,7 @@ _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
{
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4237,7 +4270,7 @@ _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
{
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1, __R);
}
@@ -4262,7 +4295,7 @@ _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
}
#else
#define _mm512_cvt_roundpd_epi32(A, B) \
- ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
@@ -4271,7 +4304,7 @@ _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
#define _mm512_cvt_roundpd_epu32(A, B) \
- ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), -1, B))
+ ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
@@ -4287,7 +4320,7 @@ _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
{
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4317,7 +4350,7 @@ _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
{
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4342,7 +4375,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
}
#else
#define _mm512_cvtt_roundps_epi32(A, B) \
- ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
@@ -4351,7 +4384,7 @@ _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
#define _mm512_cvtt_roundps_epu32(A, B) \
- ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
@@ -4367,7 +4400,7 @@ _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
{
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4397,7 +4430,7 @@ _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
{
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1, __R);
}
@@ -4422,7 +4455,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
}
#else
#define _mm512_cvt_roundps_epi32(A, B) \
- ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
@@ -4431,7 +4464,7 @@ _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
#define _mm512_cvt_roundps_epu32(A, B) \
- ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), -1, B))
+ ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B))
#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
@@ -4553,8 +4586,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask16) -1);
}
@@ -4587,8 +4621,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask16) -1);
}
@@ -4621,8 +4656,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi32_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask16) -1);
}
@@ -4656,8 +4692,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4690,8 +4727,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4724,8 +4762,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi32_epi16 (__m512i __A)
{
- __v16hi __O;
- return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, __O,
+ return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
+ (__v16hi)
+ _mm256_undefined_si256 (),
(__mmask16) -1);
}
@@ -4759,8 +4798,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi32 (__m512i __A)
{
- __v8si __O;
- return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4794,7 +4834,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi64_epi32 (__m512i __A)
{
__v8si __O;
- return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4827,8 +4869,9 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi32 (__m512i __A)
{
- __v8si __O;
- return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, __O,
+ return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
+ (__v8si)
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -4861,8 +4904,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4895,8 +4939,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4929,8 +4974,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi16 (__m512i __A)
{
- __v8hi __O;
- return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
+ (__v8hi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4963,8 +5009,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -4997,8 +5044,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtsepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -5031,8 +5079,9 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtusepi64_epi8 (__m512i __A)
{
- __v16qi __O;
- return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, __O,
+ return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -5068,7 +5117,7 @@ _mm512_cvtepi32_pd (__m256i __A)
{
return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5097,7 +5146,7 @@ _mm512_cvtepu32_pd (__m256i __A)
{
return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5127,7 +5176,7 @@ _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
{
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -5157,7 +5206,7 @@ _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
{
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -5183,7 +5232,7 @@ _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
#else
#define _mm512_cvt_roundepi32_ps(A, B) \
- (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
@@ -5192,7 +5241,7 @@ _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
(__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
#define _mm512_cvt_roundepu32_ps(A, B) \
- (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
(__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
@@ -5209,7 +5258,7 @@ _mm512_extractf64x4_pd (__m512d __A, const int __imm)
return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
__imm,
(__v4df)
- _mm256_setzero_pd (),
+ _mm256_undefined_pd (),
(__mmask8) -1);
}
@@ -5242,7 +5291,7 @@ _mm512_extractf32x4_ps (__m512 __A, const int __imm)
return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
__imm,
(__v4sf)
- _mm_setzero_ps (),
+ _mm_undefined_ps (),
(__mmask8) -1);
}
@@ -5275,7 +5324,7 @@ _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
__imm,
(__v4di)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1);
}
@@ -5308,7 +5357,7 @@ _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
__imm,
(__v4si)
- _mm_setzero_si128 (),
+ _mm_undefined_si128 (),
(__mmask8) -1);
}
@@ -5338,7 +5387,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
#define _mm512_extractf64x4_pd(X, C) \
((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
(int) (C),\
- (__v4df)(__m256d)_mm256_setzero_pd(),\
+ (__v4df)(__m256d)_mm256_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_extractf64x4_pd(W, U, X, C) \
@@ -5356,7 +5405,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
#define _mm512_extractf32x4_ps(X, C) \
((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
(int) (C),\
- (__v4sf)(__m128)_mm_setzero_ps(),\
+ (__v4sf)(__m128)_mm_undefined_ps(),\
(__mmask8)-1))
#define _mm512_mask_extractf32x4_ps(W, U, X, C) \
@@ -5374,7 +5423,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
#define _mm512_extracti64x4_epi64(X, C) \
((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
(int) (C),\
- (__v4di)(__m256i)_mm256_setzero_si256 (),\
+ (__v4di)(__m256i)_mm256_undefined_si256 (),\
(__mmask8)-1))
#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
@@ -5392,7 +5441,7 @@ _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
#define _mm512_extracti32x4_epi32(X, C) \
((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
(int) (C),\
- (__v4si)(__m128i)_mm_setzero_si128 (),\
+ (__v4si)(__m128i)_mm_undefined_si128 (),\
(__mmask8)-1))
#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
@@ -5437,7 +5486,7 @@ _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
(__v4di) __B,
__imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -5474,7 +5523,7 @@ _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
(__v4df) __B,
__imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5514,7 +5563,7 @@ _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
#define _mm512_insertf64x4(X, Y, C) \
((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
(__v4df)(__m256d) (Y), (int) (C), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
(__mmask8)-1))
#define _mm512_mask_insertf64x4(W, U, X, Y, C) \
@@ -5532,7 +5581,7 @@ _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
#define _mm512_inserti64x4(X, Y, C) \
((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
(__v4di)(__m256i) (Y), (int) (C), \
- (__v8di)(__m512i)_mm512_setzero_si512 (), \
+ (__v8di)(__m512i)_mm512_undefined_si512 (), \
(__mmask8)-1))
#define _mm512_mask_inserti64x4(W, U, X, Y, C) \
@@ -5554,7 +5603,7 @@ _mm512_loadu_pd (void const *__P)
{
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5599,7 +5648,7 @@ _mm512_loadu_ps (void const *__P)
{
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -5717,7 +5766,7 @@ _mm512_permutevar_pd (__m512d __A, __m512i __C)
return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
(__v8di) __C,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5749,7 +5798,7 @@ _mm512_permutevar_ps (__m512 __A, __m512i __C)
return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
(__v16si) __C,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -5968,7 +6017,7 @@ _mm512_permute_pd (__m512d __X, const int __C)
{
return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -5997,7 +6046,7 @@ _mm512_permute_ps (__m512 __X, const int __C)
{
return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6022,7 +6071,7 @@ _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
#else
#define _mm512_permute_pd(X, C) \
((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
- (__v8df)(__m512d)(X), \
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)(-1)))
#define _mm512_mask_permute_pd(W, U, X, C) \
@@ -6037,7 +6086,7 @@ _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
#define _mm512_permute_ps(X, C) \
((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
- (__v16sf)(__m512)(X), \
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)(-1)))
#define _mm512_mask_permute_ps(W, U, X, C) \
@@ -6058,7 +6107,7 @@ _mm512_permutex_epi64 (__m512i __X, const int __I)
{
return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) (-1));
}
@@ -6088,7 +6137,7 @@ _mm512_permutex_pd (__m512d __X, const int __M)
{
return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6113,7 +6162,8 @@ _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
#else
#define _mm512_permutex_pd(X, M) \
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
- (__v8df)(__m512d)(X), (__mmask8)-1))
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
+ (__mmask8)-1))
#define _mm512_mask_permutex_pd(W, U, X, M) \
((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
@@ -6127,7 +6177,8 @@ _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
#define _mm512_permutex_epi64(X, I) \
((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
(int)(I), \
- (__v8di)(__m512i)(X), \
+ (__v8di)(__m512i) \
+ (_mm512_undefined_si512 ()),\
(__mmask8)(-1)))
#define _mm512_maskz_permutex_epi64(M, X, I) \
@@ -6162,7 +6213,7 @@ _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
(__v8di) __X,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6195,7 +6246,7 @@ _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
(__v16si) __X,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6217,7 +6268,7 @@ _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
(__v8di) __X,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6249,7 +6300,7 @@ _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
(__v16si) __X,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6282,7 +6333,7 @@ _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
(__v16sf) __V, __imm,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6315,7 +6366,7 @@ _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
(__v8df) __V, __imm,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -6485,7 +6536,7 @@ _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
#define _mm512_shuffle_pd(X, Y, C) \
((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(C),\
- (__v8df)(__m512d)_mm512_setzero_pd(),\
+ (__v8df)(__m512d)_mm512_undefined_pd(),\
(__mmask8)-1))
#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
@@ -6503,7 +6554,7 @@ _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
#define _mm512_shuffle_ps(X, Y, C) \
((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(C),\
- (__v16sf)(__m512)_mm512_setzero_ps(),\
+ (__v16sf)(__m512)_mm512_undefined_ps(),\
(__mmask16)-1))
#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
@@ -6585,7 +6636,7 @@ _mm512_movehdup_ps (__m512 __A)
{
return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6614,7 +6665,7 @@ _mm512_moveldup_ps (__m512 __A)
{
return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -6644,7 +6695,7 @@ _mm512_or_si512 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6655,7 +6706,7 @@ _mm512_or_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6687,7 +6738,7 @@ _mm512_or_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6719,7 +6770,7 @@ _mm512_xor_si512 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6730,7 +6781,7 @@ _mm512_xor_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6762,7 +6813,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6794,7 +6845,7 @@ _mm512_rol_epi32 (__m512i __A, const int __B)
{
return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6823,7 +6874,7 @@ _mm512_ror_epi32 (__m512i __A, int __B)
{
return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6852,7 +6903,7 @@ _mm512_rol_epi64 (__m512i __A, const int __B)
{
return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6881,7 +6932,7 @@ _mm512_ror_epi64 (__m512i __A, int __B)
{
return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -6908,7 +6959,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
#define _mm512_rol_epi32(A, B) \
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
(int)(B), \
- (__v16si)_mm512_setzero_si512 (), \
+ (__v16si)_mm512_undefined_si512 (), \
(__mmask16)(-1)))
#define _mm512_mask_rol_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
@@ -6923,7 +6974,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
#define _mm512_ror_epi32(A, B) \
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
(int)(B), \
- (__v16si)_mm512_setzero_si512 (), \
+ (__v16si)_mm512_undefined_si512 (), \
(__mmask16)(-1)))
#define _mm512_mask_ror_epi32(W, U, A, B) \
((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
@@ -6938,7 +6989,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
#define _mm512_rol_epi64(A, B) \
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
(int)(B), \
- (__v8di)_mm512_setzero_si512 (), \
+ (__v8di)_mm512_undefined_si512 (), \
(__mmask8)(-1)))
#define _mm512_mask_rol_epi64(W, U, A, B) \
((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
@@ -6954,7 +7005,7 @@ _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
#define _mm512_ror_epi64(A, B) \
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
(int)(B), \
- (__v8di)_mm512_setzero_si512 (), \
+ (__v8di)_mm512_undefined_si512 (), \
(__mmask8)(-1)))
#define _mm512_mask_ror_epi64(W, U, A, B) \
((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
@@ -6975,7 +7026,7 @@ _mm512_and_si512 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -6986,7 +7037,7 @@ _mm512_and_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7018,7 +7069,7 @@ _mm512_and_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7049,7 +7100,7 @@ _mm512_andnot_si512 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7060,7 +7111,7 @@ _mm512_andnot_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7092,7 +7143,7 @@ _mm512_andnot_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7190,7 +7241,7 @@ _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7223,7 +7274,7 @@ _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7255,7 +7306,7 @@ _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -7288,7 +7339,7 @@ _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -7571,7 +7622,7 @@ _mm512_movedup_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7601,7 +7652,7 @@ _mm512_unpacklo_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7633,7 +7684,7 @@ _mm512_unpackhi_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1);
}
@@ -7665,7 +7716,7 @@ _mm512_unpackhi_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -7697,7 +7748,7 @@ _mm512_cvt_roundps_pd (__m256 __A, const int __R)
{
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -7727,7 +7778,7 @@ _mm512_cvt_roundph_ps (__m256i __A, const int __R)
{
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -7758,7 +7809,7 @@ _mm512_cvt_roundps_ph (__m512 __A, const int __I)
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
__I,
(__v16hi)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
-1);
}
@@ -7769,7 +7820,7 @@ _mm512_cvtps_ph (__m512 __A, const int __I)
return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
__I,
(__v16hi)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
-1);
}
@@ -7817,7 +7868,7 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
}
#else
#define _mm512_cvt_roundps_pd(A, B) \
- (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), -1, B)
+ (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
@@ -7826,7 +7877,7 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
(__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
#define _mm512_cvt_roundph_ps(A, B) \
- (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), -1, B)
+ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
(__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
@@ -7836,10 +7887,10 @@ _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
#define _mm512_cvt_roundps_ph(A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
- (__v16hi)_mm256_setzero_si256 (), -1))
+ (__v16hi)_mm256_undefined_si256 (), -1))
#define _mm512_cvtps_ph(A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
- (__v16hi)_mm256_setzero_si256 (), -1))
+ (__v16hi)_mm256_undefined_si256 (), -1))
#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\
(__v16hi)(__m256i)(U), (__mmask16) (W)))
@@ -7861,7 +7912,7 @@ _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf)
- _mm256_setzero_ps (),
+ _mm256_undefined_ps (),
(__mmask8) -1, __R);
}
@@ -7904,7 +7955,7 @@ _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
}
#else
#define _mm512_cvt_roundpd_ps(A, B) \
- (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), -1, B)
+ (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
(__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
@@ -7972,7 +8023,7 @@ _mm512_getexp_round_ps (__m512 __A, const int __R)
{
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -8002,7 +8053,7 @@ _mm512_getexp_round_pd (__m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -8049,7 +8100,7 @@ _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
{
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
(__C << 2) | __B,
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1, __R);
}
@@ -8085,7 +8136,7 @@ _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
{
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
(__C << 2) | __B,
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1, __R);
}
@@ -8142,7 +8193,7 @@ _mm_getmant_round_ss (__m128 __A, __m128 __B,
#define _mm512_getmant_round_pd(X, B, C, R) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)(__m512d)_mm512_undefined_pd(), \
(__mmask8)-1,\
(R)))
@@ -8162,7 +8213,7 @@ _mm_getmant_round_ss (__m128 __A, __m128 __B,
#define _mm512_getmant_round_ps(X, B, C, R) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)(__m512)_mm512_undefined_ps(), \
(__mmask16)-1,\
(R)))
@@ -8199,7 +8250,7 @@ _mm_getmant_round_ss (__m128 __A, __m128 __B,
#define _mm512_getexp_round_ps(A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, R))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
#define _mm512_mask_getexp_round_ps(W, U, A, R) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
@@ -8211,7 +8262,7 @@ _mm_getmant_round_ss (__m128 __A, __m128 __B,
#define _mm512_getexp_round_pd(A, R) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, R))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
#define _mm512_mask_getexp_round_pd(W, U, A, R) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
@@ -8228,7 +8279,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
{
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
- (__v16sf) __A, -1, __R);
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1, __R);
}
extern __inline __m512
@@ -8258,7 +8311,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
{
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
- (__v8df) __A, -1, __R);
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1, __R);
}
extern __inline __m512d
@@ -8303,7 +8358,7 @@ _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
#else
#define _mm512_roundscale_round_ps(A, B, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
- (__v16sf)(__m512)(A), (__mmask16)(-1), R))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
(int)(D), \
@@ -8316,7 +8371,7 @@ _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
(__mmask16)(A), R))
#define _mm512_roundscale_round_pd(A, B, R) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
- (__v8df)(__m512d)(A), (__mmask8)(-1), R))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
(int)(D), \
@@ -8423,7 +8478,7 @@ _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
(__v16si) __B, __imm,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -8457,7 +8512,7 @@ _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
(__v8di) __B, __imm,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -8486,7 +8541,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
#else
#define _mm512_alignr_epi32(X, Y, C) \
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(X), \
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\
(__mmask16)-1))
#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
@@ -8496,12 +8551,13 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
#define _mm512_maskz_alignr_epi32(U, X, Y, C) \
((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
- (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)_mm512_setzero_si512 (),\
+ (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#define _mm512_alignr_epi64(X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
- (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(X), (__mmask8)-1))
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \
+ (__mmask8)-1))
#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
@@ -8509,7 +8565,7 @@ _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
#define _mm512_maskz_alignr_epi64(U, X, Y, C) \
((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
- (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)_mm512_setzero_si512 (),\
+ (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
@@ -8976,7 +9032,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
{
- __m512 v1_old = _mm512_setzero_ps ();
+ __m512 v1_old = _mm512_undefined_ps ();
__mmask16 mask = 0xFFFF;
return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
@@ -9000,7 +9056,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
{
- __m512d v1_old = _mm512_setzero_pd ();
+ __m512d v1_old = _mm512_undefined_pd ();
__mmask8 mask = 0xFF;
return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
@@ -9024,7 +9080,7 @@ extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
{
- __m256 v1_old = _mm256_setzero_ps ();
+ __m256 v1_old = _mm256_undefined_ps ();
__mmask8 mask = 0xFF;
return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
@@ -9048,7 +9104,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
{
- __m512d v1_old = _mm512_setzero_pd ();
+ __m512d v1_old = _mm512_undefined_pd ();
__mmask8 mask = 0xFF;
return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
@@ -9072,7 +9128,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask16 mask = 0xFFFF;
return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
@@ -9096,7 +9152,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask8 mask = 0xFF;
return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
@@ -9121,7 +9177,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
{
- __m256i v1_old = _mm256_setzero_si256 ();
+ __m256i v1_old = _mm256_undefined_si256 ();
__mmask8 mask = 0xFF;
return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
@@ -9145,7 +9201,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
{
- __m512i v1_old = _mm512_setzero_si512 ();
+ __m512i v1_old = _mm512_undefined_si512 ();
__mmask8 mask = 0xFF;
return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
@@ -9309,7 +9365,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
}
#else
#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
- (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_setzero_ps(), \
+ (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
(float const *)ADDR, \
(__v16si)(__m512i)INDEX, \
(__mmask16)0xFFFF, (int)SCALE)
@@ -9321,7 +9377,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask16)MASK, (int)SCALE)
#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
- (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_setzero_pd(), \
+ (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
(double const *)ADDR, \
(__v8si)(__m256i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9333,7 +9389,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
- (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_setzero_ps(), \
+ (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
(float const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9345,7 +9401,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
- (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_setzero_pd(), \
+ (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
(double const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9357,7 +9413,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask8)MASK, (int)SCALE)
#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \
(int const *)ADDR, \
(__v16si)(__m512i)INDEX, \
(__mmask16)0xFFFF, (int)SCALE)
@@ -9369,7 +9425,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask16)MASK, (int)SCALE)
#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \
(long long const *)ADDR, \
(__v8si)(__m256i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9381,7 +9437,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
- (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_setzero_si256(), \
+ (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
(int const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9393,7 +9449,7 @@ _mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask,
(__mmask8)MASK, (int)SCALE)
#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
- (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_setzero_si512 (), \
+ (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \
(long long const *)ADDR, \
(__v8di)(__m512i)INDEX, \
(__mmask8)0xFF, (int)SCALE)
@@ -9889,7 +9945,7 @@ _mm512_max_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9920,7 +9976,7 @@ _mm512_min_epi64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9951,7 +10007,7 @@ _mm512_max_epu64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -9982,7 +10038,7 @@ _mm512_min_epu64 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask8) -1);
}
@@ -10013,7 +10069,7 @@ _mm512_max_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10044,7 +10100,7 @@ _mm512_min_epi32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10075,7 +10131,7 @@ _mm512_max_epu32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10106,7 +10162,7 @@ _mm512_min_epu32 (__m512i __A, __m512i __B)
return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1);
}
@@ -10137,7 +10193,7 @@ _mm512_unpacklo_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1);
}
@@ -10382,7 +10438,7 @@ _mm512_sqrt_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10414,7 +10470,7 @@ _mm512_sqrt_ps (__m512 __A)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10447,7 +10503,7 @@ _mm512_add_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10482,7 +10538,7 @@ _mm512_add_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10517,7 +10573,7 @@ _mm512_sub_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10552,7 +10608,7 @@ _mm512_sub_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10587,7 +10643,7 @@ _mm512_mul_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10622,7 +10678,7 @@ _mm512_mul_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10657,7 +10713,7 @@ _mm512_div_pd (__m512d __M, __m512d __V)
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10692,7 +10748,7 @@ _mm512_div_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10727,7 +10783,7 @@ _mm512_max_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10762,7 +10818,7 @@ _mm512_max_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10797,7 +10853,7 @@ _mm512_min_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10832,7 +10888,7 @@ _mm512_min_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10867,7 +10923,7 @@ _mm512_scalef_pd (__m512d __A, __m512d __B)
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -10902,7 +10958,7 @@ _mm512_scalef_ps (__m512 __A, __m512 __B)
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11482,7 +11538,7 @@ _mm512_cvttpd_epi32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11514,7 +11570,7 @@ _mm512_cvttpd_epu32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11546,7 +11602,7 @@ _mm512_cvtpd_epi32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11578,7 +11634,7 @@ _mm512_cvtpd_epu32 (__m512d __A)
{
return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
(__v8si)
- _mm256_setzero_si256 (),
+ _mm256_undefined_si256 (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11610,7 +11666,7 @@ _mm512_cvttps_epi32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11642,7 +11698,7 @@ _mm512_cvttps_epu32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11674,7 +11730,7 @@ _mm512_cvtps_epi32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11706,7 +11762,7 @@ _mm512_cvtps_epu32 (__m512 __A)
{
return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
(__v16si)
- _mm512_setzero_si512 (),
+ _mm512_undefined_si512 (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11764,7 +11820,7 @@ _mm512_cvtepi32_ps (__m512i __A)
{
return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -11796,7 +11852,7 @@ _mm512_cvtepu32_ps (__m512i __A)
{
return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12141,7 +12197,7 @@ _mm512_cvtps_pd (__m256 __A)
{
return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12173,7 +12229,7 @@ _mm512_cvtph_ps (__m256i __A)
{
return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12205,7 +12261,7 @@ _mm512_cvtpd_ps (__m512d __A)
{
return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
(__v8sf)
- _mm256_setzero_ps (),
+ _mm256_undefined_ps (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12238,7 +12294,7 @@ _mm512_getexp_ps (__m512 __A)
{
return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
(__v16sf)
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12270,7 +12326,7 @@ _mm512_getexp_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
(__v8df)
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12321,7 +12377,7 @@ _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
{
return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
(__C << 2) | __B,
- _mm512_setzero_pd (),
+ _mm512_undefined_pd (),
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12357,7 +12413,7 @@ _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
{
return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
(__C << 2) | __B,
- _mm512_setzero_ps (),
+ _mm512_undefined_ps (),
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12412,7 +12468,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
#define _mm512_getmant_pd(X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)_mm512_undefined_pd(), \
(__mmask8)-1,\
_MM_FROUND_CUR_DIRECTION))
@@ -12426,13 +12482,13 @@ _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
#define _mm512_maskz_getmant_pd(U, X, B, C) \
((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
(int)(((C)<<2) | (B)), \
- (__v8df)(__m512d)_mm512_setzero_pd(), \
+ (__v8df)_mm512_setzero_pd(), \
(__mmask8)(U),\
_MM_FROUND_CUR_DIRECTION))
#define _mm512_getmant_ps(X, B, C) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)_mm512_undefined_ps(), \
(__mmask16)-1,\
_MM_FROUND_CUR_DIRECTION))
@@ -12446,7 +12502,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
#define _mm512_maskz_getmant_ps(U, X, B, C) \
((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
(int)(((C)<<2) | (B)), \
- (__v16sf)(__m512)_mm512_setzero_ps(), \
+ (__v16sf)_mm512_setzero_ps(), \
(__mmask16)(U),\
_MM_FROUND_CUR_DIRECTION))
#define _mm_getmant_sd(X, Y, C, D) \
@@ -12471,7 +12527,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
#define _mm512_getexp_ps(A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getexp_ps(W, U, A) \
((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
@@ -12483,7 +12539,7 @@ _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
#define _mm512_getexp_pd(A) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_getexp_pd(W, U, A) \
((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
@@ -12500,7 +12556,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_ps (__m512 __A, const int __imm)
{
return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
- (__v16sf) __A, -1,
+ (__v16sf)
+ _mm512_undefined_ps (),
+ -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12532,7 +12590,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_roundscale_pd (__m512d __A, const int __imm)
{
return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
- (__v8df) __A, -1,
+ (__v8df)
+ _mm512_undefined_pd (),
+ -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12580,7 +12640,7 @@ _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
#else
#define _mm512_roundscale_ps(A, B) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
- (__v16sf)(__m512)(A), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
+ (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_ps(A, B, C, D) \
((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
(int)(D), \
@@ -12593,7 +12653,7 @@ _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
(__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
#define _mm512_roundscale_pd(A, B) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
- (__v8df)(__m512d)(A), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
+ (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
#define _mm512_mask_roundscale_pd(A, B, C, D) \
((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
(int)(D), \
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index f960b76..2ea327c 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -1168,6 +1168,27 @@ _mm256_movemask_ps (__m256 __A)
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_pd (void)
+{
+ __m256d __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_ps (void)
+{
+ __m256 __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_undefined_si256 (void)
+{
+ __m256i __Y = __Y;
+ return __Y;
+}
+
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_setzero_pd (void)
{
return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 };
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 08928fb..a2bdf0e 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -86,6 +86,14 @@ _mm_setr_pd (double __W, double __X)
return __extension__ (__m128d){ __W, __X };
}
+/* Create an undefined vector. */
+extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_pd (void)
+{
+ __m128d __Y = __Y;
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_pd (void)
@@ -728,6 +736,14 @@ _mm_move_epi64 (__m128i __A)
return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
}
+/* Create an undefined vector. */
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_si128 (void)
+{
+ __m128i __Y = __Y;
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si128 (void)
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 0238439..a3824e7 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -102,6 +102,14 @@ typedef float __v4sf __attribute__ ((__vector_size__ (16)));
#define _MM_FLUSH_ZERO_ON 0x8000
#define _MM_FLUSH_ZERO_OFF 0x0000
+/* Create an undefined vector. */
+extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_undefined_ps (void)
+{
+ __m128 __Y = __Y;
+ return __Y;
+}
+
/* Create a vector of zeros. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_ps (void)