diff options
Diffstat (limited to 'clang/lib/Headers/avx2intrin.h')
-rw-r--r-- | clang/lib/Headers/avx2intrin.h | 320 |
1 files changed, 133 insertions, 187 deletions
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 3c3a3d1..384faa3 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -112,10 +112,9 @@ /// \param __a /// A 256-bit integer vector. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi8(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v32qs)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi8(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v32qs)__a); } /// Computes the absolute value of each signed 16-bit element in the 256-bit @@ -129,10 +128,9 @@ _mm256_abs_epi8(__m256i __a) /// \param __a /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi16(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v16hi)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi16(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v16hi)__a); } /// Computes the absolute value of each signed 32-bit element in the 256-bit @@ -146,10 +144,9 @@ _mm256_abs_epi16(__m256i __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_abs_epi32(__m256i __a) -{ - return (__m256i)__builtin_elementwise_abs((__v8si)__a); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_abs_epi32(__m256i __a) { + return (__m256i)__builtin_elementwise_abs((__v8si)__a); } /// Converts the elements of two 256-bit vectors of [16 x i16] to 8-bit @@ -367,9 +364,8 @@ _mm256_add_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qs)__a, (__v32qs)__b); } @@ -385,9 +381,8 @@ _mm256_adds_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hi)__a, (__v16hi)__b); } @@ -404,9 +399,8 @@ _mm256_adds_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the source operands. /// \returns A 256-bit integer vector containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v32qu)__a, (__v32qu)__b); } @@ -422,9 +416,8 @@ _mm256_adds_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the sums. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_adds_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_add_sat((__v16hu)__a, (__v16hu)__b); } @@ -641,7 +634,7 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); @@ -667,7 +660,7 @@ _mm256_cmpeq_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); @@ -693,7 +686,7 @@ _mm256_cmpeq_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); @@ -719,7 +712,7 @@ _mm256_cmpeq_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); @@ -745,7 +738,7 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing one of the inputs. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char @@ -773,7 +766,7 @@ _mm256_cmpgt_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the inputs. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); @@ -799,7 +792,7 @@ _mm256_cmpgt_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the inputs. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); @@ -825,7 +818,7 @@ _mm256_cmpgt_epi32(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [4 x i64] containing one of the inputs. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); @@ -1371,9 +1364,8 @@ _mm256_movemask_epi8(__m256i __a) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); @@ -1399,9 +1391,8 @@ _mm256_cvtepi8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); @@ -1426,9 +1417,8 @@ _mm256_cvtepi8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char which may be signed or unsigned, so use __v16qs. */ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); @@ -1454,9 +1444,8 @@ _mm256_cvtepi8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } @@ -1479,9 +1468,8 @@ _mm256_cvtepi16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } @@ -1504,9 +1492,8 @@ _mm256_cvtepi16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the sign-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } @@ -1530,9 +1517,8 @@ _mm256_cvtepi32_epi64(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [16 x i16] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi16(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } @@ -1556,9 +1542,8 @@ _mm256_cvtepu8_epi16(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } @@ -1581,9 +1566,8 @@ _mm256_cvtepu8_epi32(__m128i __V) /// A 128-bit integer vector containing the source bytes. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu8_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } @@ -1607,9 +1591,8 @@ _mm256_cvtepu8_epi64(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [8 x i32] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi32(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } @@ -1632,9 +1615,8 @@ _mm256_cvtepu16_epi32(__m128i __V) /// A 128-bit vector of [8 x i16] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu16_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } @@ -1657,9 +1639,8 @@ _mm256_cvtepu16_epi64(__m128i __V) /// A 128-bit vector of [4 x i32] containing the source values. /// \returns A 256-bit vector of [4 x i64] containing the zero-extended /// values. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtepu32_epi64(__m128i __V) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } @@ -1683,9 +1664,8 @@ _mm256_cvtepu32_epi64(__m128i __V) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } @@ -1729,10 +1709,10 @@ _mm256_mulhrs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epu16(__m256i __a, __m256i __b) { - return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); + return (__m256i)__builtin_ia32_pmulhuw256((__v16hu)__a, (__v16hu)__b); } /// Multiplies signed 16-bit integer elements of two 256-bit vectors of @@ -1748,7 +1728,7 @@ _mm256_mulhi_epu16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); @@ -1767,7 +1747,7 @@ _mm256_mulhi_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing one of the source operands. /// \returns A 256-bit vector of [16 x i16] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); @@ -1812,9 +1792,8 @@ _mm256_mullo_epi32 (__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32] containing one of the source operands. /// \returns A 256-bit vector of [4 x i64] containing the products. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mul_epu32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } @@ -2142,9 +2121,8 @@ _mm256_sign_epi32(__m256i __a, __m256i __b) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } @@ -2182,9 +2160,8 @@ _mm256_sll_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } @@ -2222,9 +2199,8 @@ _mm256_sll_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_slli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } @@ -2263,9 +2239,8 @@ _mm256_sll_epi64(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } @@ -2305,9 +2280,8 @@ _mm256_sra_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srai_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } @@ -2386,9 +2360,8 @@ _mm256_sra_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi16(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } @@ -2426,9 +2399,8 @@ _mm256_srl_epi16(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi32(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } @@ -2466,9 +2438,8 @@ _mm256_srl_epi32(__m256i __a, __m128i __count) /// \param __count /// An unsigned integer value specifying the shift count (in bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_srli_epi64(__m256i __a, int __count) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } @@ -2619,9 +2590,8 @@ _mm256_sub_epi64(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qs)__a, (__v32qs)__b); } @@ -2645,9 +2615,8 @@ _mm256_subs_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hi)__a, (__v16hi)__b); } @@ -2672,9 +2641,8 @@ _mm256_subs_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector containing the subtrahends. /// \returns A 256-bit integer vector containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epu8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v32qu)__a, (__v32qu)__b); } @@ -2698,9 +2666,8 @@ _mm256_subs_epu8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16] containing the subtrahends. /// \returns A 256-bit vector of [16 x i16] containing the differences. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_subs_epu16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_elementwise_sub_sat((__v16hu)__a, (__v16hu)__b); } @@ -2732,9 +2699,8 @@ _mm256_subs_epu16(__m256i __a, __m256i __b) /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } @@ -2767,9 +2733,8 @@ _mm256_unpackhi_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } @@ -2801,9 +2766,8 @@ _mm256_unpackhi_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } @@ -2831,9 +2795,8 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpackhi_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } @@ -2865,9 +2828,8 @@ _mm256_unpackhi_epi64(__m256i __a, __m256i __b) /// A 256-bit integer vector used as the source for the odd-numbered bytes /// of the result. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi8(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } @@ -2900,9 +2862,8 @@ _mm256_unpacklo_epi8(__m256i __a, __m256i __b) /// A 256-bit vector of [16 x i16] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi16(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } @@ -2934,9 +2895,8 @@ _mm256_unpacklo_epi16(__m256i __a, __m256i __b) /// A 256-bit vector of [8 x i32] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi32(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } @@ -2964,9 +2924,8 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x i64] used as the source for the odd-numbered /// elements of the result. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_unpacklo_epi64(__m256i __a, __m256i __b) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } @@ -3017,9 +2976,8 @@ _mm256_stream_load_si256(const void *__V) /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x float] containing the result. -static __inline__ __m128 __DEFAULT_FN_ATTRS128 -_mm_broadcastss_ps(__m128 __X) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } @@ -3034,9 +2992,8 @@ _mm_broadcastss_ps(__m128 __X) /// \param __a /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x double] containing the result. -static __inline__ __m128d __DEFAULT_FN_ATTRS128 -_mm_broadcastsd_pd(__m128d __a) -{ +static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } @@ -3051,9 +3008,8 @@ _mm_broadcastsd_pd(__m128d __a) /// \param __X /// A 128-bit vector of [4 x float] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x float] containing the result. -static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_broadcastss_ps(__m128 __X) -{ +static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3068,9 +3024,8 @@ _mm256_broadcastss_ps(__m128 __X) /// \param __X /// A 128-bit vector of [2 x double] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x double] containing the result. -static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_broadcastsd_pd(__m128d __X) -{ +static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } @@ -3084,9 +3039,8 @@ _mm256_broadcastsd_pd(__m128d __X) /// \param __X /// A 128-bit integer vector to be broadcast. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastsi128_si256(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); } @@ -3176,9 +3130,8 @@ _mm256_broadcastsi128_si256(__m128i __X) /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastb_epi8(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3192,9 +3145,8 @@ _mm256_broadcastb_epi8(__m128i __X) /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastw_epi16(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3208,9 +3160,8 @@ _mm256_broadcastw_epi16(__m128i __X) /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastd_epi32(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3224,9 +3175,8 @@ _mm256_broadcastd_epi32(__m128i __X) /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_broadcastq_epi64(__m128i __X) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } @@ -3240,9 +3190,8 @@ _mm256_broadcastq_epi64(__m128i __X) /// \param __X /// A 128-bit integer vector whose low byte will be broadcast. /// \returns A 128-bit integer vector containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastb_epi8(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3256,9 +3205,8 @@ _mm_broadcastb_epi8(__m128i __X) /// \param __X /// A 128-bit vector of [8 x i16] whose low element will be broadcast. /// \returns A 128-bit vector of [8 x i16] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastw_epi16(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } @@ -3272,9 +3220,8 @@ _mm_broadcastw_epi16(__m128i __X) /// \param __X /// A 128-bit vector of [4 x i32] whose low element will be broadcast. /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastd_epi32(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } @@ -3288,9 +3235,8 @@ _mm_broadcastd_epi32(__m128i __X) /// \param __X /// A 128-bit vector of [2 x i64] whose low element will be broadcast. /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_broadcastq_epi64(__m128i __X) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } @@ -3764,7 +3710,7 @@ _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); @@ -3786,7 +3732,7 @@ _mm256_sllv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); @@ -3808,7 +3754,7 @@ _mm_sllv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); @@ -3830,7 +3776,7 @@ _mm256_sllv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); @@ -3853,7 +3799,7 @@ _mm_sllv_epi64(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); @@ -3876,7 +3822,7 @@ _mm256_srav_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); @@ -3898,7 +3844,7 @@ _mm_srav_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [8 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); @@ -3920,7 +3866,7 @@ _mm256_srlv_epi32(__m256i __X, __m256i __Y) /// A 128-bit vector of [4 x i32] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [4 x i32] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); @@ -3942,7 +3888,7 @@ _mm_srlv_epi32(__m128i __X, __m128i __Y) /// A 256-bit vector of [4 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 256-bit vector of [4 x i64] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); @@ -3964,7 +3910,7 @@ _mm256_srlv_epi64(__m256i __X, __m256i __Y) /// A 128-bit vector of [2 x i64] containing the unsigned shift counts (in /// bits). /// \returns A 128-bit vector of [2 x i64] containing the result. -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); |