diff options
Diffstat (limited to 'clang/lib/Headers/mmintrin.h')
-rw-r--r-- | clang/lib/Headers/mmintrin.h | 223 |
1 files changed, 97 insertions, 126 deletions
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h index dc0fa5c..4ed95c5 100644 --- a/clang/lib/Headers/mmintrin.h +++ b/clang/lib/Headers/mmintrin.h @@ -57,6 +57,9 @@ typedef char __v16qi __attribute__((__vector_size__(16))); #define __trunc64(x) \ (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) #define __anyext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, -1, -1) @@ -85,7 +88,7 @@ _mm_empty(void) { /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi32_si64(int __i) { return __extension__ (__m64)(__v2si){__i, 0}; @@ -102,7 +105,7 @@ _mm_cvtsi32_si64(int __i) /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_si32(__m64 __m) { return ((__v2si)__m)[0]; @@ -118,10 +121,10 @@ _mm_cvtsi64_si32(__m64 __m) /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtsi64_m64(long long __i) { - return (__m64)__i; + return __extension__ (__m64)(__v1di){__i}; } /// Casts a 64-bit integer vector into a 64-bit signed integer value. @@ -134,10 +137,10 @@ _mm_cvtsi64_m64(long long __i) /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. -static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 +static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cvtm64_si64(__m64 __m) { - return (long long)__m; + return ((__v1di)__m)[0]; } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -239,11 +242,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 4, 12, 5, 13, 6, 14, 7, 15); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5, + 13, 6, 14, 7, 15); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -263,11 +265,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 2, 6, 3, 7); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -285,10 +285,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] @@ -312,11 +311,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, - 0, 8, 1, 9, 2, 10, 3, 11); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9, + 2, 10, 3, 11); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -336,11 +334,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, - 0, 4, 1, 5); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -358,10 +354,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); } /// Adds each 8-bit integer element of the first 64-bit integer vector @@ -379,7 +374,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); @@ -400,7 +395,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); @@ -421,7 +416,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_add_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); @@ -445,10 +440,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); } /// Adds, with saturation, each 16-bit signed integer element of the first @@ -469,10 +463,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); } /// Adds, with saturation, each 8-bit unsigned integer element of the first @@ -492,10 +485,9 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pu8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pu8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); } /// Adds, with saturation, each 16-bit unsigned integer element of the first @@ -515,10 +507,9 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_adds_pu16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_adds_pu16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); } /// Subtracts each 8-bit integer element of the second 64-bit integer @@ -536,7 +527,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); @@ -557,7 +548,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); @@ -578,7 +569,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_sub_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); @@ -602,10 +593,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pi8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); } /// Subtracts, with saturation, each 16-bit signed integer element of the @@ -626,10 +616,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pi16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pi16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit @@ -650,10 +639,9 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pu8(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pu8(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); } /// Subtracts each 16-bit unsigned integer element of the second 64-bit @@ -674,10 +662,9 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_subs_pu16(__m64 __m1, __m64 __m2) -{ - return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_subs_pu16(__m64 __m1, __m64 __m2) { + return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -723,11 +710,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { - return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), - (__v8hi)__anyext128(__m2))); + return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1), + (__v8hi)__zext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -745,7 +732,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_mullo_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); @@ -791,11 +778,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count)); } /// Left-shifts each 32-bit signed integer element of the first @@ -838,11 +823,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count)); } /// Left-shifts the first 64-bit integer parameter by the number of bits @@ -880,11 +863,9 @@ _mm_sll_si64(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_slli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_slli_si64(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -929,11 +910,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srai_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -978,11 +957,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srai_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srai_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -1025,11 +1002,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi16(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_pi16(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -1072,11 +1047,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_pi32(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_pi32(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count)); } /// Right-shifts the first 64-bit integer parameter by the number of bits @@ -1115,11 +1088,9 @@ _mm_srl_si64(__m64 __m, __m64 __count) /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 -_mm_srli_si64(__m64 __m, int __count) -{ - return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), - __count)); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_srli_si64(__m64 __m, int __count) { + return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count)); } /// Performs a bitwise AND of two 64-bit integer vectors. @@ -1134,7 +1105,7 @@ _mm_srli_si64(__m64 __m, int __count) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_and_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); @@ -1155,7 +1126,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_andnot_si64(__m64 __m1, __m64 __m2) { return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); @@ -1173,7 +1144,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_or_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); @@ -1191,7 +1162,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_xor_si64(__m64 __m1, __m64 __m2) { return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); @@ -1213,7 +1184,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); @@ -1235,7 +1206,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); @@ -1257,7 +1228,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); @@ -1279,7 +1250,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { /* This function always performs a signed comparison, but __v8qi is a char @@ -1303,7 +1274,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { return (__m64)((__v4hi)__m1 > (__v4hi)__m2); @@ -1325,7 +1296,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { return (__m64)((__v2si)__m1 > (__v2si)__m2); |