diff options
Diffstat (limited to 'clang/lib/Headers')
-rw-r--r-- | clang/lib/Headers/avx2intrin.h | 47 | ||||
-rw-r--r-- | clang/lib/Headers/avx512bwintrin.h | 15 | ||||
-rw-r--r-- | clang/lib/Headers/avx512cdintrin.h | 86 | ||||
-rw-r--r-- | clang/lib/Headers/avx512dqintrin.h | 60 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 126 | ||||
-rw-r--r-- | clang/lib/Headers/avx512ifmaintrin.h | 63 | ||||
-rw-r--r-- | clang/lib/Headers/avx512ifmavlintrin.h | 92 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlbwintrin.h | 20 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlcdintrin.h | 154 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vldqintrin.h | 50 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 361 | ||||
-rw-r--r-- | clang/lib/Headers/avxifmaintrin.h | 9 | ||||
-rw-r--r-- | clang/lib/Headers/avxintrin.h | 95 | ||||
-rw-r--r-- | clang/lib/Headers/float.h | 9 | ||||
-rw-r--r-- | clang/lib/Headers/pmmintrin.h | 25 | ||||
-rw-r--r-- | clang/lib/Headers/ptrauth.h | 14 | ||||
-rw-r--r-- | clang/lib/Headers/smmintrin.h | 12 | ||||
-rw-r--r-- | clang/lib/Headers/tmmintrin.h | 123 | ||||
-rw-r--r-- | clang/lib/Headers/xmmintrin.h | 5 |
19 files changed, 591 insertions, 775 deletions
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 4aaca2d..fa7f4c2 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -834,10 +834,9 @@ _mm256_cmpgt_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally adds the adjacent pairs of 32-bit integers from two 256-bit @@ -866,10 +865,9 @@ _mm256_hadd_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadd_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadd_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } /// Horizontally adds the adjacent pairs of 16-bit integers from two 256-bit @@ -901,10 +899,9 @@ _mm256_hadd_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hadds_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hadds_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -937,10 +934,9 @@ _mm256_hadds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } /// Horizontally subtracts adjacent pairs of 32-bit integers from two 256-bit @@ -969,10 +965,9 @@ _mm256_hsub_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [8 x i32] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsub_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsub_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } /// Horizontally subtracts adjacent pairs of 16-bit integers from two 256-bit @@ -1005,10 +1000,9 @@ _mm256_hsub_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_hsubs_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_hsubs_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } /// Multiplies each unsigned byte from the 256-bit integer vector in \a __a @@ -1858,9 +1852,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b) /// control byte specify the index (within the same 128-bit half) of \a __a /// to copy to the result byte. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shuffle_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 473fe94..23b2d29 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -866,23 +866,20 @@ _mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_shuffle_epi8(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_shuffle_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h index 8899298..fb6dcb6 100644 --- a/clang/lib/Headers/avx512cdintrin.h +++ b/clang/lib/Headers/avx512cdintrin.h @@ -15,94 +15,82 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512cd"), __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \ __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr -#else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi64 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); +_mm512_conflict_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_conflict_epi64(__A), - (__v8di)__W); +_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) -{ +_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), - (__v8di)_mm512_setzero_si512 ()); + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi32 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); +_mm512_conflict_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)__W); +_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)_mm512_setzero_si512()); +_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi32(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_clzg((__v16si)__A, (__v16si)_mm512_set1_epi32(32)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_lzcnt_epi32(__A), - (__v16si)__W); + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi64(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_clzg( (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_lzcnt_epi64(__A), - (__v8di)__W); + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), @@ -110,19 +98,15 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m512i) _mm512_set1_epi64((long long) __A); +_mm512_broadcastmb_epi64(__mmask8 __A) { + return (__m512i)_mm512_set1_epi64((long long)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmw_epi32 (__mmask16 __A) -{ - return (__m512i) _mm512_set1_epi32((int) __A); - +_mm512_broadcastmw_epi32(__mmask16 __A) { + return (__m512i)_mm512_set1_epi32((int)__A); } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index fb65bf9..3681cca 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1083,17 +1083,15 @@ _mm512_broadcast_f32x2(__m128 __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)_mm512_setzero_ps()); @@ -1106,17 +1104,15 @@ _mm512_broadcast_f32x8(__m256 __A) { 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)_mm512_setzero_ps()); @@ -1128,17 +1124,15 @@ _mm512_broadcast_f64x2(__m128d __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)_mm512_setzero_pd()); @@ -1151,17 +1145,15 @@ _mm512_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)_mm512_setzero_si512()); @@ -1174,17 +1166,15 @@ _mm512_broadcast_i32x8(__m256i __A) { 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)_mm512_setzero_si512()); @@ -1196,17 +1186,15 @@ _mm512_broadcast_i64x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 80e5842..07de036 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -225,17 +225,15 @@ _mm512_broadcastd_epi32(__m128i __A) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) _mm512_setzero_si512()); @@ -247,18 +245,14 @@ _mm512_broadcastq_epi64(__m128i __A) { 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di) _mm512_broadcastq_epi64(__A), - (__v8di) __O); - +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di) _mm512_broadcastq_epi64(__A), (__v8di) _mm512_setzero_si512()); @@ -321,9 +315,8 @@ _mm512_set1_epi32(int __s) __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi32(__mmask16 __M, int __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si)_mm512_set1_epi32(__A), (__v16si)_mm512_setzero_si512()); @@ -335,9 +328,8 @@ _mm512_set1_epi64(long long __d) return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_set1_epi64(__A), (__v8di)_mm512_setzero_si512()); @@ -6552,17 +6544,15 @@ _mm512_broadcast_f32x4(__m128 __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)_mm512_setzero_ps()); @@ -6597,17 +6587,15 @@ _mm512_broadcast_i32x4(__m128i __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)_mm512_setzero_si512()); @@ -6635,33 +6623,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) __O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) _mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) __O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) _mm512_setzero_ps()); @@ -8381,17 +8365,15 @@ _mm512_movehdup_ps (__m512 __A) 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)_mm512_setzero_ps()); @@ -8404,44 +8386,38 @@ _mm512_moveldup_ps (__m512 __A) 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } @@ -8884,17 +8860,15 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B) } #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, (__v16si) _mm512_set1_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, (__v8di) _mm512_set1_epi64(__A), (__v8di) __O); diff --git a/clang/lib/Headers/avx512ifmaintrin.h b/clang/lib/Headers/avx512ifmaintrin.h index f01b322..625a8ff 100644 --- a/clang/lib/Headers/avx512ifmaintrin.h +++ b/clang/lib/Headers/avx512ifmaintrin.h @@ -15,54 +15,53 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ + __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), \ __min_vector_width__(512))) +#endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52huq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52hi_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52hi_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52hi_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di) __X, (__v8di) __Y, - (__v8di) __Z); +_mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_vpmadd52luq512((__v8di)__X, (__v8di)__Y, + (__v8di)__Z); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), - (__v8di)__W); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_madd52lo_epu64( + __m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__W, __X, __Y), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), - (__v8di)_mm512_setzero_si512()); +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_madd52lo_epu64( + __mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_madd52lo_epu64(__X, __Y, __Z), + (__v8di)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512ifmavlintrin.h b/clang/lib/Headers/avx512ifmavlintrin.h index a72b561..c4449c7 100644 --- a/clang/lib/Headers/avx512ifmavlintrin.h +++ b/clang/lib/Headers/avx512ifmavlintrin.h @@ -8,13 +8,24 @@ *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H -#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." +#error \ + "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __IFMAVLINTRIN_H #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512ifma,avx512vl"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512ifma,avx512vl"), \ @@ -24,6 +35,8 @@ __target__("avx512ifma,avx512vl"), \ __min_vector_width__(256))) +#endif + #define _mm_madd52hi_epu64(X, Y, Z) \ ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ (__v2di)(Z))) @@ -41,70 +54,57 @@ (__v4di)(Z))) static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), - (__v2di)__W); +_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) -{ - return (__m128i)__builtin_ia32_selectq_128(__M, - (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), - (__v2di)_mm_setzero_si128()); +_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { + return (__m128i)__builtin_ia32_selectq_128( + __M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), + (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), - (__v4di)__W); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64( + __m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) -{ - return (__m256i)__builtin_ia32_selectq_256(__M, - (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), - (__v4di)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64( + __mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), + (__v4di)_mm256_setzero_si256()); } - #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 81e4cbb9..639fb60 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1067,33 +1067,29 @@ _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h index 30c9f90..7719680f 100644 --- a/clang/lib/Headers/avx512vlcdintrin.h +++ b/clang/lib/Headers/avx512vlcdintrin.h @@ -14,208 +14,182 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#else -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmb_epi64 (__mmask8 __A) -{ +_mm_broadcastmb_epi64(__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m256i) _mm256_set1_epi64x((long long)__A); +_mm256_broadcastmb_epi64(__mmask8 __A) { + return (__m256i)_mm256_set1_epi64x((long long)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmw_epi32 (__mmask16 __A) -{ +_mm_broadcastmw_epi32(__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmw_epi32 (__mmask16 __A) -{ +_mm256_broadcastmw_epi32(__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); +_mm_conflict_epi64(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_conflict_epi64(__A), - (__v2di)__W); +_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi64 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); +_mm256_conflict_epi64(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_conflict_epi64(__A), - (__v4di)__W); +_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi32 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); +_mm_conflict_epi32(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_conflict_epi32(__A), - (__v4si)__W); +_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi32 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); +_mm256_conflict_epi32(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_conflict_epi32(__A), - (__v8si)__W); +_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_clzg((__v4si)__A, (__v4si)_mm_set1_epi32(32)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)__W); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_clzg((__v8si)__A, (__v8si)_mm256_set1_epi32(32)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_lzcnt_epi32(__A), - (__v8si)__W); + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_clzg( (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)__W); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_clzg( (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_lzcnt_epi64(__A), - (__v4di)__W); + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), @@ -224,7 +198,5 @@ _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __AVX512VLCDINTRIN_H */ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 68bd52e..ee7974e 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -968,17 +968,15 @@ _mm256_broadcast_f32x2(__m128 __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)_mm256_setzero_ps()); @@ -990,17 +988,15 @@ _mm256_broadcast_f64x2(__m128d __A) { 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)_mm256_setzero_pd()); @@ -1012,17 +1008,15 @@ _mm_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)_mm_setzero_si128()); @@ -1034,17 +1028,15 @@ _mm256_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)_mm256_setzero_si256()); @@ -1056,17 +1048,15 @@ _mm256_broadcast_i64x2(__m128i __A) { 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 965741f..676b5a0 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -5101,69 +5101,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)__O); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)_mm_setzero_si128()); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)__O); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256()); } - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ - return (__m256i) __builtin_ia32_selectq_256(__M, - (__v4di) _mm256_set1_epi64x(__A), - (__v4di) _mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256()); } #define _mm_fixupimm_pd(A, B, C, imm) \ @@ -5610,130 +5596,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) (__mmask8) __U); } - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); @@ -6055,129 +6024,117 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -6594,17 +6551,15 @@ _mm256_broadcast_f32x4(__m128 __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)_mm256_setzero_ps()); @@ -6616,129 +6571,113 @@ _mm256_broadcast_i32x4(__m128i __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) _mm256_setzero_si256()); @@ -8003,65 +7942,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)_mm256_setzero_ps()); diff --git a/clang/lib/Headers/avxifmaintrin.h b/clang/lib/Headers/avxifmaintrin.h index 5c782d2a..a2ef601 100644 --- a/clang/lib/Headers/avxifmaintrin.h +++ b/clang/lib/Headers/avxifmaintrin.h @@ -15,12 +15,21 @@ #define __AVXIFMAINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avxifma"), __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, __target__("avxifma"), \ __min_vector_width__(256))) +#endif // must vex-encoding diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index d6ba19a..696ec31 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -694,9 +694,8 @@ _mm256_xor_ps(__m256 __a, __m256 __b) /// elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal sums of /// both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hadd_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hadd_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_haddpd256((__v4df)__a, (__v4df)__b); } @@ -717,9 +716,8 @@ _mm256_hadd_pd(__m256d __a, __m256d __b) /// index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal sums of /// both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hadd_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hadd_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_haddps256((__v8sf)__a, (__v8sf)__b); } @@ -740,9 +738,8 @@ _mm256_hadd_ps(__m256 __a, __m256 __b) /// odd-indexed elements of a vector of [4 x double]. /// \returns A 256-bit vector of [4 x double] containing the horizontal /// differences of both operands. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_hsub_pd(__m256d __a, __m256d __b) -{ +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_hsub_pd(__m256d __a, __m256d __b) { return (__m256d)__builtin_ia32_hsubpd256((__v4df)__a, (__v4df)__b); } @@ -763,9 +760,8 @@ _mm256_hsub_pd(__m256d __a, __m256d __b) /// elements with index 2, 3, 6, 7 of a vector of [8 x float]. /// \returns A 256-bit vector of [8 x float] containing the horizontal /// differences of both operands. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_hsub_ps(__m256 __a, __m256 __b) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a, + __m256 __b) { return (__m256)__builtin_ia32_hsubps256((__v8sf)__a, (__v8sf)__b); } @@ -2539,9 +2535,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) { /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } @@ -2568,9 +2563,8 @@ _mm_testz_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } @@ -2598,9 +2592,8 @@ _mm_testc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } @@ -2627,9 +2620,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } @@ -2656,9 +2648,8 @@ _mm_testz_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } @@ -2686,9 +2677,8 @@ _mm_testc_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } @@ -2715,9 +2705,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } @@ -2744,9 +2733,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } @@ -2774,9 +2762,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } @@ -2803,9 +2790,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } @@ -2832,9 +2818,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } @@ -2862,9 +2847,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } @@ -2888,9 +2872,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit integer vector. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } @@ -2914,9 +2897,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } @@ -2941,9 +2923,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } diff --git a/clang/lib/Headers/float.h b/clang/lib/Headers/float.h index 84551af..30427c2 100644 --- a/clang/lib/Headers/float.h +++ b/clang/lib/Headers/float.h @@ -89,6 +89,9 @@ !defined(__STRICT_ANSI__) # undef INFINITY # undef NAN +# undef FLT_SNAN +# undef DBL_SNAN +# undef LDBL_SNAN #endif /* Characteristics of floating point types, C99 5.2.4.2.2 */ @@ -160,9 +163,15 @@ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \ !defined(__STRICT_ANSI__) + /* C23 5.2.5.3.2p28 */ +# define FLT_SNAN (__builtin_nansf("")) +# define DBL_SNAN (__builtin_nans("")) +# define LDBL_SNAN (__builtin_nansl("")) + /* C23 5.2.5.3.3p29-30 */ # define INFINITY (__builtin_inff()) # define NAN (__builtin_nanf("")) + /* C23 5.2.5.3.3p32 */ # define FLT_NORM_MAX __FLT_NORM_MAX__ # define DBL_NORM_MAX __DBL_NORM_MAX__ diff --git a/clang/lib/Headers/pmmintrin.h b/clang/lib/Headers/pmmintrin.h index f0c9b2b..42bd343 100644 --- a/clang/lib/Headers/pmmintrin.h +++ b/clang/lib/Headers/pmmintrin.h @@ -83,9 +83,8 @@ _mm_addsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hadd_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); } @@ -106,9 +105,8 @@ _mm_hadd_ps(__m128 __a, __m128 __b) /// bits of the destination. /// \returns A 128-bit vector of [4 x float] containing the horizontal /// differences of both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_hsub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); } @@ -168,9 +166,8 @@ _mm_moveldup_ps(__m128 __a) /// A 128-bit vector of [2 x double] containing the right source operand. /// \returns A 128-bit vector of [2 x double] containing the alternating sums /// and differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_addsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_addsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); } @@ -191,9 +188,8 @@ _mm_addsub_pd(__m128d __a, __m128d __b) /// destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hadd_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_pd(__m128d __a, __m128d __b) { return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); } @@ -214,9 +210,8 @@ _mm_hadd_pd(__m128d __a, __m128d __b) /// the destination. /// \returns A 128-bit vector of [2 x double] containing the horizontal /// differences of both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_hsub_pd(__m128d __a, __m128d __b) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_pd(__m128d __a, __m128d __b) { return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); } diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h index f902ca1..ad28f06 100644 --- a/clang/lib/Headers/ptrauth.h +++ b/clang/lib/Headers/ptrauth.h @@ -241,6 +241,18 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; #define ptrauth_type_discriminator(__type) \ __builtin_ptrauth_type_discriminator(__type) +/* Compute the constant discriminator used by Clang to sign pointers with the + given C function pointer type. + + A call to this function is an integer constant expression. */ +#if __has_feature(ptrauth_function_pointer_type_discrimination) +#define ptrauth_function_pointer_type_discriminator(__type) \ + __builtin_ptrauth_type_discriminator(__type) +#else +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) +#endif + /* Compute a signature for the given pair of pointer-sized values. The order of the arguments is significant. @@ -372,6 +384,8 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; }) #define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0) +#define ptrauth_function_pointer_type_discriminator(__type) \ + ((ptrauth_extra_data_t)0) #define ptrauth_sign_generic_data(__value, __data) \ ({ \ diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 5e63a1a..4f197d5 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) { /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } @@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all ones; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } @@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 3fc9f98..ee96caa 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -202,10 +202,9 @@ _mm_abs_epi32(__m128i __a) { /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -225,10 +224,9 @@ _mm_hadd_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of /// both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadd_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadd_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -248,11 +246,10 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -272,11 +269,10 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadd_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadd_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -299,10 +295,9 @@ _mm_hadd_pi32(__m64 __a, __m64 __b) /// destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hadds_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hadds_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -325,11 +320,10 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hadds_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phaddsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hadds_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -349,10 +343,9 @@ _mm_hadds_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -372,10 +365,9 @@ _mm_hsub_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsub_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsub_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -395,11 +387,10 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -419,11 +410,10 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsub_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubd128( - (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsub_pi32(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -446,10 +436,9 @@ _mm_hsub_pi32(__m64 __a, __m64 __b) /// the destination. /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_hsubs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_hsubs_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -472,11 +461,10 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_hsubs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_phsubsw128( - (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_hsubs_pi16(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -556,10 +544,9 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) { /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mulhrs_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, + __m128i __b) { + return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -603,10 +590,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b) /// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_shuffle_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } /// Copies the 8-bit integers from a 64-bit integer vector to the @@ -628,13 +614,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// destination. \n /// Bits [2:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_shuffle_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pshufb128( - (__v16qi)__builtin_shufflevector( - (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_pi8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pshufb128( + (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){}, + 0, 1, 0, 1), + (__v16qi)__zext128(__b))); } /// For each 8-bit integer in the first source operand, perform one of diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b47..605409c 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2416,9 +2416,8 @@ _mm_min_pu8(__m64 __a, __m64 __b) { /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 -_mm_movemask_pi8(__m64 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); } |