diff options
Diffstat (limited to 'clang/lib/Headers')
-rw-r--r-- | clang/lib/Headers/__clang_hip_runtime_wrapper.h | 2 | ||||
-rw-r--r-- | clang/lib/Headers/avx2intrin.h | 13 | ||||
-rw-r--r-- | clang/lib/Headers/avx512bwintrin.h | 30 | ||||
-rw-r--r-- | clang/lib/Headers/avx512cdintrin.h | 86 | ||||
-rw-r--r-- | clang/lib/Headers/avx512dqintrin.h | 60 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 126 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlbwintrin.h | 28 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlcdintrin.h | 154 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vldqintrin.h | 50 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 361 | ||||
-rw-r--r-- | clang/lib/Headers/tmmintrin.h | 33 |
11 files changed, 384 insertions, 559 deletions
diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index da1e39a..fb0ece9 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -25,6 +25,8 @@ #define __constant__ __attribute__((constant)) #define __managed__ __attribute__((managed)) +#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__))) + #if !defined(__cplusplus) || __cplusplus < 201103L #define nullptr NULL; #endif diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index e150aa6..d35bc0e 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1650,9 +1650,8 @@ _mm256_mul_epi32(__m256i __a, __m256i __b) { /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the rounded products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mulhrs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } @@ -1670,8 +1669,7 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_mulhi_epu16(__m256i __a, __m256i __b) -{ +_mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } @@ -1852,9 +1850,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b) /// control byte specify the index (within the same 128-bit half) of \a __a /// to copy to the result byte. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_shuffle_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); } diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 473fe94..ac75b6c 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -866,23 +866,20 @@ _mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_shuffle_epi8(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_shuffle_epi8(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, (__v64qi)_mm512_shuffle_epi8(__A, __B), (__v64qi)_mm512_setzero_si512()); @@ -1006,23 +1003,20 @@ _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, (__v32hi)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mulhrs_epi16(__m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mulhrs_epi16(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, (__v32hi)_mm512_mulhrs_epi16(__A, __B), (__v32hi)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h index 8899298..fb6dcb6 100644 --- a/clang/lib/Headers/avx512cdintrin.h +++ b/clang/lib/Headers/avx512cdintrin.h @@ -15,94 +15,82 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512cd"), __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \ __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr -#else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi64 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); +_mm512_conflict_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_conflict_epi64(__A), - (__v8di)__W); +_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) -{ +_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), - (__v8di)_mm512_setzero_si512 ()); + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi32 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); +_mm512_conflict_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)__W); +_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)_mm512_setzero_si512()); +_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi32(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_clzg((__v16si)__A, (__v16si)_mm512_set1_epi32(32)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_lzcnt_epi32(__A), - (__v16si)__W); + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi64(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_clzg( (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_lzcnt_epi64(__A), - (__v8di)__W); + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), @@ -110,19 +98,15 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m512i) _mm512_set1_epi64((long long) __A); +_mm512_broadcastmb_epi64(__mmask8 __A) { + return (__m512i)_mm512_set1_epi64((long long)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_broadcastmw_epi32 (__mmask16 __A) -{ - return (__m512i) _mm512_set1_epi32((int) __A); - +_mm512_broadcastmw_epi32(__mmask16 __A) { + return (__m512i)_mm512_set1_epi32((int)__A); } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h index fb65bf9..3681cca 100644 --- a/clang/lib/Headers/avx512dqintrin.h +++ b/clang/lib/Headers/avx512dqintrin.h @@ -1083,17 +1083,15 @@ _mm512_broadcast_f32x2(__m128 __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x2(__A), (__v16sf)_mm512_setzero_ps()); @@ -1106,17 +1104,15 @@ _mm512_broadcast_f32x8(__m256 __A) { 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x8(__A), (__v16sf)_mm512_setzero_ps()); @@ -1128,17 +1124,15 @@ _mm512_broadcast_f64x2(__m128d __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, (__v8df)_mm512_broadcast_f64x2(__A), (__v8df)_mm512_setzero_pd()); @@ -1151,17 +1145,15 @@ _mm512_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x2(__A), (__v16si)_mm512_setzero_si512()); @@ -1174,17 +1166,15 @@ _mm512_broadcast_i32x8(__m256i __A) { 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x8(__A), (__v16si)_mm512_setzero_si512()); @@ -1196,17 +1186,15 @@ _mm512_broadcast_i64x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, (__v8di)_mm512_broadcast_i64x2(__A), (__v8di)_mm512_setzero_si512()); diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 80e5842..07de036 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -225,17 +225,15 @@ _mm512_broadcastd_epi32(__m128i __A) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si) _mm512_broadcastd_epi32(__A), (__v16si) _mm512_setzero_si512()); @@ -247,18 +245,14 @@ _mm512_broadcastq_epi64(__m128i __A) { 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) -{ - return (__m512i)__builtin_ia32_selectq_512(__M, - (__v8di) _mm512_broadcastq_epi64(__A), - (__v8di) __O); - +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { + return (__m512i)__builtin_ia32_selectq_512( + __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di) _mm512_broadcastq_epi64(__A), (__v8di) _mm512_setzero_si512()); @@ -321,9 +315,8 @@ _mm512_set1_epi32(int __s) __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi32(__mmask16 __M, int __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i)__builtin_ia32_selectd_512(__M, (__v16si)_mm512_set1_epi32(__A), (__v16si)_mm512_setzero_si512()); @@ -335,9 +328,8 @@ _mm512_set1_epi64(long long __d) return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) -{ +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, (__v8di)_mm512_set1_epi64(__A), (__v8di)_mm512_setzero_si512()); @@ -6552,17 +6544,15 @@ _mm512_broadcast_f32x4(__m128 __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, (__v16sf)_mm512_broadcast_f32x4(__A), (__v16sf)_mm512_setzero_ps()); @@ -6597,17 +6587,15 @@ _mm512_broadcast_i32x4(__m128i __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, (__v16si)_mm512_broadcast_i32x4(__A), (__v16si)_mm512_setzero_si512()); @@ -6635,33 +6623,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) __O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, (__v8df) _mm512_broadcastsd_pd(__A), (__v8df) _mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) __O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, (__v16sf) _mm512_broadcastss_ps(__A), (__v16sf) _mm512_setzero_ps()); @@ -8381,17 +8365,15 @@ _mm512_movehdup_ps (__m512 __A) 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_movehdup_ps(__A), (__v16sf)_mm512_setzero_ps()); @@ -8404,44 +8386,38 @@ _mm512_moveldup_ps (__m512 __A) 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) -{ +static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_moveldup_ps(__A), (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), _mm_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), _mm_setzero_pd()); } @@ -8884,17 +8860,15 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B) } #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, (__v16si) _mm512_set1_epi32(__A), (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, (__v8di) _mm512_set1_epi64(__A), (__v8di) __O); diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h index 81e4cbb9..0fcfe37 100644 --- a/clang/lib/Headers/avx512vlbwintrin.h +++ b/clang/lib/Headers/avx512vlbwintrin.h @@ -1067,33 +1067,29 @@ _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_shuffle_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_shuffle_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); @@ -1514,28 +1510,28 @@ _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h index 30c9f90..7719680f 100644 --- a/clang/lib/Headers/avx512vlcdintrin.h +++ b/clang/lib/Headers/avx512vlcdintrin.h @@ -14,208 +14,182 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#else -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmb_epi64 (__mmask8 __A) -{ +_mm_broadcastmb_epi64(__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m256i) _mm256_set1_epi64x((long long)__A); +_mm256_broadcastmb_epi64(__mmask8 __A) { + return (__m256i)_mm256_set1_epi64x((long long)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastmw_epi32 (__mmask16 __A) -{ +_mm_broadcastmw_epi32(__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastmw_epi32 (__mmask16 __A) -{ +_mm256_broadcastmw_epi32(__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); +_mm_conflict_epi64(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_conflict_epi64(__A), - (__v2di)__W); +_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi64 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); +_mm256_conflict_epi64(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_conflict_epi64(__A), - (__v4di)__W); +_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi32 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); +_mm_conflict_epi32(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_conflict_epi32(__A), - (__v4si)__W); +_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi32 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); +_mm256_conflict_epi32(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_conflict_epi32(__A), - (__v8si)__W); +_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_clzg((__v4si)__A, (__v4si)_mm_set1_epi32(32)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)__W); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_clzg((__v8si)__A, (__v8si)_mm256_set1_epi32(32)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_lzcnt_epi32(__A), - (__v8si)__W); + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_clzg( (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)__W); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_clzg( (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_lzcnt_epi64(__A), - (__v4di)__W); + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), @@ -224,7 +198,5 @@ _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __AVX512VLCDINTRIN_H */ diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h index 68bd52e..ee7974e 100644 --- a/clang/lib/Headers/avx512vldqintrin.h +++ b/clang/lib/Headers/avx512vldqintrin.h @@ -968,17 +968,15 @@ _mm256_broadcast_f32x2(__m128 __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)_mm256_setzero_ps()); @@ -990,17 +988,15 @@ _mm256_broadcast_f64x2(__m128d __A) { 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)_mm256_setzero_pd()); @@ -1012,17 +1008,15 @@ _mm_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)_mm_setzero_si128()); @@ -1034,17 +1028,15 @@ _mm256_broadcast_i32x2(__m128i __A) { 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)_mm256_setzero_si256()); @@ -1056,17 +1048,15 @@ _mm256_broadcast_i64x2(__m128i __A) { 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)_mm256_setzero_si256()); diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 965741f..676b5a0 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -5101,69 +5101,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)__O); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m128i)__builtin_ia32_selectd_128(__M, - (__v4si) _mm_set1_epi32(__A), - (__v4si)_mm_setzero_si128()); +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)__O); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi32( __mmask8 __M, int __A) -{ - return (__m256i)__builtin_ia32_selectd_256(__M, - (__v8si) _mm256_set1_epi32(__A), - (__v8si)_mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi32(__mmask8 __M, int __A) { + return (__m256i)__builtin_ia32_selectd_256( + __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256()); } - -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, (__v2di) _mm_set1_epi64x(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, (__v4di) _mm256_set1_epi64x(__A), (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) -{ - return (__m256i) __builtin_ia32_selectq_256(__M, - (__v4di) _mm256_set1_epi64x(__A), - (__v4di) _mm256_setzero_si256()); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) { + return (__m256i)__builtin_ia32_selectq_256( + __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256()); } #define _mm_fixupimm_pd(A, B, C, imm) \ @@ -5610,130 +5596,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) (__mmask8) __U); } - -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpackhi_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpackhi_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpackhi_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpackhi_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_unpacklo_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_unpacklo_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_unpacklo_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_unpacklo_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); @@ -6055,129 +6024,117 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpackhi_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpackhi_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpackhi_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpackhi_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_unpacklo_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_unpacklo_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_unpacklo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, + __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_unpacklo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); @@ -6594,17 +6551,15 @@ _mm256_broadcast_f32x4(__m128 __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x4(__A), (__v8sf)_mm256_setzero_ps()); @@ -6616,129 +6571,113 @@ _mm256_broadcast_i32x4(__m128i __A) { 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x4(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, (__v4df) _mm256_broadcastsd_pd(__A), (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, (__v4sf) _mm_broadcastss_ps(__A), (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, (__v8sf) _mm256_broadcastss_ps(__A), (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si) _mm_broadcastd_epi32(__A), (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, (__v8si) _mm256_broadcastd_epi32(__A), (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, (__v2di) _mm_broadcastq_epi64(__A), (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, (__v4di) _mm256_broadcastq_epi64(__A), (__v4di) _mm256_setzero_si256()); @@ -8003,65 +7942,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_movehdup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_movehdup_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_moveldup_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_moveldup_ps(__A), (__v8sf)_mm256_setzero_ps()); diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 9d007c8..5d0f20f 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -544,8 +544,8 @@ _mm_maddubs_pi16(__m64 __a, __m64 __b) { /// A 128-bit vector of [8 x i16] containing one of the source operands. /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhrs_epi16(__m128i __a, __m128i __b) { return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); } @@ -563,11 +563,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_mulhrs_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mulhrs_pi16(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__zext128(__a), + (__v8hi)__zext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the @@ -590,10 +589,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b) /// Bits [6:4] Reserved. \n /// Bits [3:0] select the source byte to be copied. /// \returns A 128-bit integer vector containing the copied or cleared values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_shuffle_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); } /// Copies the 8-bit integers from a 64-bit integer vector to the @@ -615,13 +613,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// destination. \n /// Bits [2:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_shuffle_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_pshufb128( - (__v16qi)__builtin_shufflevector( - (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_shuffle_pi8(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pshufb128( + (__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){}, + 0, 1, 0, 1), + (__v16qi)__zext128(__b))); } /// For each 8-bit integer in the first source operand, perform one of |