aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Pakkanen <jpakkane@gmail.com>2017-02-17 23:01:34 +0200
committerJussi Pakkanen <jpakkane@gmail.com>2017-07-17 19:00:42 +0300
commitfc68e0c63ad87f11ce885e9470c6d8e0f2d8f020 (patch)
treef7e93b0b092fd51c6488a1fbbf115a77c008a9f6
parent5d731b102bec98449c82a20d893977a0c9594643 (diff)
downloadmeson-fc68e0c63ad87f11ce885e9470c6d8e0f2d8f020.zip
meson-fc68e0c63ad87f11ce885e9470c6d8e0f2d8f020.tar.gz
meson-fc68e0c63ad87f11ce885e9470c6d8e0f2d8f020.tar.bz2
Add SSE4.2 support.
-rw-r--r--test cases/common/139 simd/meson.build3
-rw-r--r--test cases/common/139 simd/simd_sse42.c26
-rw-r--r--test cases/common/139 simd/simdchecker.c6
-rw-r--r--test cases/common/139 simd/simdfuncs.h5
4 files changed, 38 insertions, 2 deletions
diff --git a/test cases/common/139 simd/meson.build b/test cases/common/139 simd/meson.build
index 6a4c6c1..7769bfd 100644
--- a/test cases/common/139 simd/meson.build
+++ b/test cases/common/139 simd/meson.build
@@ -17,8 +17,6 @@ cdata = configuration_data()
# and then have a target that uses the result in links_with.
# The following headers need to be added. Also Thumb and Altivec.
-#<nmmintrin.h> SSE4.2
-#<ammintrin.h> SSE4A
#<wmmintrin.h> AES
#<immintrin.h> AVX
#<zmmintrin.h> AVX512
@@ -31,6 +29,7 @@ simdarr = [['-mmmx', 'HAVE_MMX', 'simd_mmx', 'simd_mmx.c'],
['-msse3', 'HAVE_SSE3', 'simd_sse3', 'simd_sse3.c'],
['-mssse3', 'HAVE_SSSE3', 'simd_ssse3', 'simd_ssse3.c'],
['-msse4.1', 'HAVE_SSE41', 'simd_sse41', 'simd_sse41.c'],
+ ['-msse4.2', 'HAVE_SSE42', 'simd_sse42', 'simd_sse42.c'],
]
foreach ia : simdarr
diff --git a/test cases/common/139 simd/simd_sse42.c b/test cases/common/139 simd/simd_sse42.c
new file mode 100644
index 0000000..229ef03
--- /dev/null
+++ b/test cases/common/139 simd/simd_sse42.c
@@ -0,0 +1,26 @@
+#include<simdconfig.h>
+#include<simdfuncs.h>
+
+#include<nmmintrin.h>
+#include<cpuid.h>
+#include<stdint.h>
+
+int sse42_available() {
+ return __builtin_cpu_supports("sse4.2");
+}
+
+void increment_sse42(float arr[4]) {
+ double darr[4];
+ __m128d val1 = _mm_set_pd(arr[0], arr[1]);
+ __m128d val2 = _mm_set_pd(arr[2], arr[3]);
+ __m128d one = _mm_set_pd1(1.0);
+ __m128d result = _mm_add_pd(val1, one);
+ _mm_store_pd(darr, result);
+ result = _mm_add_pd(val2, one);
+ _mm_store_pd(&darr[2], result);
+ _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */
+ arr[0] = (float)darr[1];
+ arr[1] = (float)darr[0];
+ arr[2] = (float)darr[3];
+ arr[3] = (float)darr[2];
+}
diff --git a/test cases/common/139 simd/simdchecker.c b/test cases/common/139 simd/simdchecker.c
index ece1c77..62841b0 100644
--- a/test cases/common/139 simd/simdchecker.c
+++ b/test cases/common/139 simd/simdchecker.c
@@ -17,6 +17,12 @@ int main(int argc, char **argv) {
/* Add here. The first matched one is used so put "better" instruction
* sets at the top.
*/
+#if HAVE_SSE42
+ if(fptr == NULL && sse42_available()) {
+ fptr = increment_sse42;
+ type = "SSE42";
+ }
+#endif
#if HAVE_SSE41
if(fptr == NULL && sse41_available()) {
fptr = increment_sse41;
diff --git a/test cases/common/139 simd/simdfuncs.h b/test cases/common/139 simd/simdfuncs.h
index 67a8e1b..22781bf 100644
--- a/test cases/common/139 simd/simdfuncs.h
+++ b/test cases/common/139 simd/simdfuncs.h
@@ -39,6 +39,11 @@ int sse41_available();
void increment_sse41(float arr[4]);
#endif
+#if HAVE_SSE42
+int sse42_available();
+void increment_sse42(float arr[4]);
+#endif
+
#if HAVE_AVX
int avx_available();
void increment_avx(float arr[4]);