aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/Headers
diff options
context:
space:
mode:
authorFreddy Ye <freddy.ye@intel.com>2024-05-23 10:25:23 +0800
committerGitHub <noreply@github.com>2024-05-23 10:25:23 +0800
commitaa4069ea96e5eb62bc8c7895b9d920f129611b3a (patch)
tree24a42d84b2a2ab7e92ab5e80f20b5c3f5b8aa06b /clang/lib/Headers
parent282d2ab58f56c89510f810a43d4569824a90c538 (diff)
downloadllvm-aa4069ea96e5eb62bc8c7895b9d920f129611b3a.zip
llvm-aa4069ea96e5eb62bc8c7895b9d920f129611b3a.tar.gz
llvm-aa4069ea96e5eb62bc8c7895b9d920f129611b3a.tar.bz2
Revert "[X86] Remove knl/knm specific ISAs supports (#92883)" (#93123)
This reverts commit 282d2ab58f56c89510f810a43d4569824a90c538.
Diffstat (limited to 'clang/lib/Headers')
-rw-r--r--clang/lib/Headers/CMakeLists.txt2
-rw-r--r--clang/lib/Headers/avx512erintrin.h271
-rw-r--r--clang/lib/Headers/avx512pfintrin.h92
-rw-r--r--clang/lib/Headers/immintrin.h8
-rw-r--r--clang/lib/Headers/module.modulemap1
5 files changed, 374 insertions, 0 deletions
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index dbff92b..5f02c71 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,10 +153,12 @@ set(x86_files
avx512bwintrin.h
avx512cdintrin.h
avx512dqintrin.h
+ avx512erintrin.h
avx512fintrin.h
avx512fp16intrin.h
avx512ifmaintrin.h
avx512ifmavlintrin.h
+ avx512pfintrin.h
avx512vbmi2intrin.h
avx512vbmiintrin.h
avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
new file mode 100644
index 0000000..1c5a2d2
--- /dev/null
+++ b/clang/lib/Headers/avx512erintrin.h
@@ -0,0 +1,271 @@
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+
+/* exp2a23 */
+#define _mm512_exp2a23_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+ _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+ _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+/* rsqrt28 */
+#define _mm512_rsqrt28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+ _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+ _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_ss(A, B) \
+ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+ _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_sd(A, B) \
+ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+/* rcp28 */
+#define _mm512_rcp28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rcp28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+ _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) \
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rcp28_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_rcp28_ps(A) \
+ _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_ps(S, M, A) \
+ _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_ps(M, A) \
+ _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
+ ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rcp28_ss(A, B) \
+ _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_ss(S, M, A, B) \
+ _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_ss(M, A, B) \
+ _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rcp28_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
+ ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rcp28_sd(A, B) \
+ _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_sd(S, M, A, B) \
+ _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_sd(M, A, B) \
+ _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#endif /* __AVX512ERINTRIN_H */
diff --git a/clang/lib/Headers/avx512pfintrin.h b/clang/lib/Headers/avx512pfintrin.h
new file mode 100644
index 0000000..f853be0
--- /dev/null
+++ b/clang/lib/Headers/avx512pfintrin.h
@@ -0,0 +1,92 @@
+/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512PFINTRIN_H
+#define __AVX512PFINTRIN_H
+
+#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
+ __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
+ (void const *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
+ __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
+ (void const *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
+ __builtin_ia32_gatherpfdps((__mmask16)(mask), \
+ (__v16si)(__m512i)(index), (void const *)(addr), \
+ (int)(scale), (int)(hint))
+
+#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
+ __builtin_ia32_gatherpfdps((__mmask16) -1, \
+ (__v16si)(__m512i)(index), (void const *)(addr), \
+ (int)(scale), (int)(hint))
+
+#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
+ __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
+ (void const *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
+ __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
+ (void const *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
+ __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
+ (void const *)(addr), (int)(scale), (int)(hint))
+
+#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
+ __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
+ (void const *)(addr), (int)(scale), (int)(hint))
+
+#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
+ __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
+ (void *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
+ __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
+ (void *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
+ __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
+ (void *)(addr), (int)(scale), (int)(hint))
+
+#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
+ __builtin_ia32_scatterpfdps((__mmask16)(mask), \
+ (__v16si)(__m512i)(index), (void *)(addr), \
+ (int)(scale), (int)(hint))
+
+#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
+ __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
+ (void *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
+ __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
+ (void *)(addr), (int)(scale), \
+ (int)(hint))
+
+#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
+ __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
+ (void *)(addr), (int)(scale), (int)(hint))
+
+#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
+ __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
+ (void *)(addr), (int)(scale), (int)(hint))
+
+#endif
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index cd6cf09..508696d 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -151,6 +151,10 @@
#include <avx512vldqintrin.h>
#endif
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)
+#include <avx512erintrin.h>
+#endif
+
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
#include <avx512ifmaintrin.h>
#endif
@@ -182,6 +186,10 @@
#include <avx512vlvbmi2intrin.h>
#endif
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)
+#include <avx512pfintrin.h>
+#endif
+
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
#include <avx512fp16intrin.h>
#endif
diff --git a/clang/lib/Headers/module.modulemap b/clang/lib/Headers/module.modulemap
index 9ffc249..4abfd1d 100644
--- a/clang/lib/Headers/module.modulemap
+++ b/clang/lib/Headers/module.modulemap
@@ -44,6 +44,7 @@ module _Builtin_intrinsics [system] [extern_c] {
textual header "avxintrin.h"
textual header "avx2intrin.h"
textual header "avx512fintrin.h"
+ textual header "avx512erintrin.h"
textual header "fmaintrin.h"
header "x86intrin.h"