aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Pakkanen <jpakkane@gmail.com>2017-02-17 22:56:49 +0200
committerJussi Pakkanen <jpakkane@gmail.com>2017-07-17 19:00:42 +0300
commit5d731b102bec98449c82a20d893977a0c9594643 (patch)
tree8658b261dd371bb07799343a9f4e7caaaf877e64
parent52a0d958f173ea96f3ea8a065dd388375d4321c3 (diff)
downloadmeson-5d731b102bec98449c82a20d893977a0c9594643.zip
meson-5d731b102bec98449c82a20d893977a0c9594643.tar.gz
meson-5d731b102bec98449c82a20d893977a0c9594643.tar.bz2
Add SSE4.1 support.
-rw-r--r--test cases/common/139 simd/meson.build2
-rw-r--r--test cases/common/139 simd/simd_sse41.c26
-rw-r--r--test cases/common/139 simd/simdchecker.c6
-rw-r--r--test cases/common/139 simd/simdfuncs.h5
4 files changed, 38 insertions, 1 deletions
diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build
index 6307550..6a4c6c1 100644
--- a/test cases/common/139 simd/meson.build
+++ b/test cases/common/139 simd/meson.build
@@ -17,7 +17,6 @@ cdata = configuration_data()
# and then have a target that uses the result in links_with.
# The following headers need to be added. Also Thumb and Altivec.
-#<smmintrin.h> SSE4.1
#<nmmintrin.h> SSE4.2
#<ammintrin.h> SSE4A
#<wmmintrin.h> AES
@@ -31,6 +30,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'],
['-msse2', 'HAVE_SSE2', 'simd_sse2', 'simd_sse2.c'],
['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'],
['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'],
+ ['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'],
]
foreach ia : simdarr
diff --git a/test cases/common/139 simd/simd_sse41.c b/test cases/common/139 simd/simd_sse41.c
new file mode 100644
index 0000000..6087e40
--- /dev/null
+++ b/test cases/common/139 simd/simd_sse41.c
@@ -0,0 +1,26 @@
+#include<simdconfig.h>
+#include<simdfuncs.h>
+
+#include<smmintrin.h>
+#include<cpuid.h>
+#include<stdint.h>
+
+int sse41_available() {
+ return __builtin_cpu_supports("sse4.1");
+}
+
+void increment_sse41(float arr[4]) {
+ double darr[4];
+ __m128d val1 = _mm_set_pd(arr[0], arr[1]);
+ __m128d val2 = _mm_set_pd(arr[2], arr[3]);
+ __m128d one = _mm_set_pd1(1.0);
+ __m128d result = _mm_add_pd(val1, one);
+ result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */
+ _mm_store_pd(darr, result);
+ result = _mm_add_pd(val2, one);
+ _mm_store_pd(&darr[2], result);
+ arr[0] = (float)darr[1];
+ arr[1] = (float)darr[0];
+ arr[2] = (float)darr[3];
+ arr[3] = (float)darr[2];
+}
diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c
index e0722c9..ece1c77 100644
--- a/test cases/common/139 simd/simdchecker.c
+++ b/test cases/common/139 simd/simdchecker.c
@@ -17,6 +17,12 @@ int main(int argc, char **argv) {
/* Add here. The first matched one is used so put "better" instruction
* sets at the top.
*/
+#if HAVE_SSE41
+ if(fptr == NULL && sse41_available()) {
+ fptr = increment_sse41;
+ type = "SSE41";
+ }
+#endif
#if HAVE_SSSE3
if(fptr == NULL && ssse3_available()) {
fptr = increment_ssse3;
diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h
index 9b53181..67a8e1b 100644
--- a/test cases/common/139 simd/simdfuncs.h
+++ b/test cases/common/139 simd/simdfuncs.h
@@ -34,6 +34,11 @@ int ssse3_available();
void increment_ssse3(float arr[4]);
#endif
+#if HAVE_SSE41
+int sse41_available();
+void increment_sse41(float arr[4]);
+#endif
+
#if HAVE_AVX
int avx_available();
void increment_avx(float arr[4]);