aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Headers/mmintrin.h
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Headers/mmintrin.h')
-rw-r--r--clang/lib/Headers/mmintrin.h223
1 files changed, 97 insertions, 126 deletions
diff --git a/clang/lib/Headers/mmintrin.h b/clang/lib/Headers/mmintrin.h
index dc0fa5c..4ed95c5 100644
--- a/clang/lib/Headers/mmintrin.h
+++ b/clang/lib/Headers/mmintrin.h
@@ -57,6 +57,9 @@ typedef char __v16qi __attribute__((__vector_size__(16)));
#define __trunc64(x) \
(__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
+#define __zext128(x) \
+ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
+ 1, 2, 3)
#define __anyext128(x) \
(__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \
1, -1, -1)
@@ -85,7 +88,7 @@ _mm_empty(void) {
/// A 32-bit integer value.
/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
/// parameter. The upper 32 bits are set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi32_si64(int __i)
{
return __extension__ (__m64)(__v2si){__i, 0};
@@ -102,7 +105,7 @@ _mm_cvtsi32_si64(int __i)
/// A 64-bit integer vector.
/// \returns A 32-bit signed integer value containing the lower 32 bits of the
/// parameter.
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_si32(__m64 __m)
{
return ((__v2si)__m)[0];
@@ -118,10 +121,10 @@ _mm_cvtsi64_si32(__m64 __m)
/// A 64-bit signed integer.
/// \returns A 64-bit integer vector containing the same bitwise pattern as the
/// parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtsi64_m64(long long __i)
{
- return (__m64)__i;
+ return __extension__ (__m64)(__v1di){__i};
}
/// Casts a 64-bit integer vector into a 64-bit signed integer value.
@@ -134,10 +137,10 @@ _mm_cvtsi64_m64(long long __i)
/// A 64-bit integer vector.
/// \returns A 64-bit signed integer containing the same bitwise pattern as the
/// parameter.
-static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
+static __inline__ long long __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cvtm64_si64(__m64 __m)
{
- return (long long)__m;
+ return ((__v1di)__m)[0];
}
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
@@ -239,11 +242,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
/// Bits [63:56] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
- 4, 12, 5, 13, 6, 14, 7, 15);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
+ 13, 6, 14, 7, 15);
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -263,11 +265,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
/// Bits [63:48] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
- 2, 6, 3, 7);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7);
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -285,10 +285,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -312,11 +311,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
/// Bits [31:24] are written to bits [63:56] of the result.
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
- 0, 8, 1, 9, 2, 10, 3, 11);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
+ 2, 10, 3, 11);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -336,11 +334,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
/// Bits [31:16] are written to bits [63:48] of the result.
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
- 0, 4, 1, 5);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5);
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -358,10 +354,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
/// the upper 32 bits of the result.
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
}
/// Adds each 8-bit integer element of the first 64-bit integer vector
@@ -379,7 +374,7 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
@@ -400,7 +395,7 @@ _mm_add_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
@@ -421,7 +416,7 @@ _mm_add_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_add_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
@@ -445,10 +440,9 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Adds, with saturation, each 16-bit signed integer element of the first
@@ -469,10 +463,9 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
/// of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Adds, with saturation, each 8-bit unsigned integer element of the first
@@ -492,10 +485,9 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pu8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pu8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
}
/// Adds, with saturation, each 16-bit unsigned integer element of the first
@@ -515,10 +507,9 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// unsigned sums of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_adds_pu16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_adds_pu16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
}
/// Subtracts each 8-bit integer element of the second 64-bit integer
@@ -536,7 +527,7 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
@@ -557,7 +548,7 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
@@ -578,7 +569,7 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
/// both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_sub_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
@@ -602,10 +593,9 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pi8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
}
/// Subtracts, with saturation, each 16-bit signed integer element of the
@@ -626,10 +616,9 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pi16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pi16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
}
/// Subtracts each 8-bit unsigned integer element of the second 64-bit
@@ -650,10 +639,9 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pu8(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pu8(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
}
/// Subtracts each 16-bit unsigned integer element of the second 64-bit
@@ -674,10 +662,9 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
/// differences of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_subs_pu16(__m64 __m1, __m64 __m2)
-{
- return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_subs_pu16(__m64 __m1, __m64 __m2) {
+ return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -723,11 +710,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
{
- return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
- (__v8hi)__anyext128(__m2)));
+ return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__zext128(__m1),
+ (__v8hi)__zext128(__m2)));
}
/// Multiplies each 16-bit signed integer element of the first 64-bit
@@ -745,7 +732,7 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
/// of the products of both parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_mullo_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
@@ -791,11 +778,9 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
/// values. If \a __count is greater or equal to 16, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi16(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_pi16(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psllwi128((__v8hi)__zext128(__m), __count));
}
/// Left-shifts each 32-bit signed integer element of the first
@@ -838,11 +823,9 @@ _mm_sll_pi32(__m64 __m, __m64 __count)
/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
/// values. If \a __count is greater or equal to 32, the result is set to all
/// 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_pi32(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_pi32(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_pslldi128((__v4si)__zext128(__m), __count));
}
/// Left-shifts the first 64-bit integer parameter by the number of bits
@@ -880,11 +863,9 @@ _mm_sll_si64(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the left-shifted value. If
/// \a __count is greater or equal to 64, the result is set to 0.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_slli_si64(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_slli_si64(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psllqi128((__v2di)__zext128(__m), __count));
}
/// Right-shifts each 16-bit integer element of the first parameter,
@@ -929,11 +910,9 @@ _mm_sra_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi16(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srai_pi16(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psrawi128((__v8hi)__zext128(__m), __count));
}
/// Right-shifts each 32-bit integer element of the first parameter,
@@ -978,11 +957,9 @@ _mm_sra_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srai_pi32(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srai_pi32(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psradi128((__v4si)__zext128(__m), __count));
}
/// Right-shifts each 16-bit integer element of the first parameter,
@@ -1025,11 +1002,9 @@ _mm_srl_pi16(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi16(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_pi16(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__zext128(__m), __count));
}
/// Right-shifts each 32-bit integer element of the first parameter,
@@ -1072,11 +1047,9 @@ _mm_srl_pi32(__m64 __m, __m64 __count)
/// A 32-bit integer value.
/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
/// values.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_pi32(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_pi32(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psrldi128((__v4si)__zext128(__m), __count));
}
/// Right-shifts the first 64-bit integer parameter by the number of bits
@@ -1115,11 +1088,9 @@ _mm_srl_si64(__m64 __m, __m64 __count)
/// \param __count
/// A 32-bit integer value.
/// \returns A 64-bit integer vector containing the right-shifted value.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_srli_si64(__m64 __m, int __count)
-{
- return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
- __count));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_srli_si64(__m64 __m, int __count) {
+ return __trunc64(__builtin_ia32_psrlqi128((__v2di)__zext128(__m), __count));
}
/// Performs a bitwise AND of two 64-bit integer vectors.
@@ -1134,7 +1105,7 @@ _mm_srli_si64(__m64 __m, int __count)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_and_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
@@ -1155,7 +1126,7 @@ _mm_and_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise AND of the second
/// parameter and the one's complement of the first parameter.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_andnot_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
@@ -1173,7 +1144,7 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_or_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
@@ -1191,7 +1162,7 @@ _mm_or_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector.
/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
/// parameters.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_xor_si64(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
@@ -1213,7 +1184,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
@@ -1235,7 +1206,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
@@ -1257,7 +1228,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
@@ -1279,7 +1250,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [8 x i8].
/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
{
/* This function always performs a signed comparison, but __v8qi is a char
@@ -1303,7 +1274,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [4 x i16].
/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
{
return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
@@ -1325,7 +1296,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
/// A 64-bit integer vector of [2 x i32].
/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
/// results.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
{
return (__m64)((__v2si)__m1 > (__v2si)__m2);