aboutsummaryrefslogtreecommitdiff
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/BuiltinsNVPTX.td5
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp15
-rw-r--r--clang/test/CodeGen/builtins-nvptx-native-half-type.c41
-rw-r--r--clang/test/CodeGen/builtins-nvptx.c12
4 files changed, 59 insertions, 14 deletions
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index bdbdfa2..f797e29 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -321,6 +321,11 @@ def __nvvm_fabs_ftz_f : NVPTXBuiltin<"float(float)">;
def __nvvm_fabs_f : NVPTXBuiltin<"float(float)">;
def __nvvm_fabs_d : NVPTXBuiltin<"double(double)">;
+def __nvvm_fabs_f16 : NVPTXBuiltinSMAndPTX<"__fp16(__fp16)", SM_53, PTX65>;
+def __nvvm_fabs_f16x2 : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(_Vector<2, __fp16>)", SM_53, PTX65>;
+def __nvvm_fabs_ftz_f16 : NVPTXBuiltinSMAndPTX<"__fp16(__fp16)", SM_53, PTX65>;
+def __nvvm_fabs_ftz_f16x2 : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(_Vector<2, __fp16>)", SM_53, PTX65>;
+
// Round
def __nvvm_round_ftz_f : NVPTXBuiltin<"float(float)">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
index 0f7ab9f..002af4f 100644
--- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp
@@ -1034,6 +1034,21 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
*this);
+ case NVPTX::BI__nvvm_fabs_f:
+ case NVPTX::BI__nvvm_abs_bf16:
+ case NVPTX::BI__nvvm_abs_bf16x2:
+ case NVPTX::BI__nvvm_fabs_f16:
+ case NVPTX::BI__nvvm_fabs_f16x2:
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs,
+ EmitScalarExpr(E->getArg(0)));
+ case NVPTX::BI__nvvm_fabs_ftz_f:
+ case NVPTX::BI__nvvm_fabs_ftz_f16:
+ case NVPTX::BI__nvvm_fabs_ftz_f16x2:
+ return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz,
+ EmitScalarExpr(E->getArg(0)));
+ case NVPTX::BI__nvvm_fabs_d:
+ return Builder.CreateUnaryIntrinsic(Intrinsic::fabs,
+ EmitScalarExpr(E->getArg(0)));
case NVPTX::BI__nvvm_ldg_h:
case NVPTX::BI__nvvm_ldg_h2:
return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
index 5114977..01a004e 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -26,14 +26,14 @@
// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX72_SM86 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
-// RUN: sm_53 -target-feature +ptx42 -fcuda-is-device -fnative-half-type \
+// RUN: sm_53 -target-feature +ptx65 -fcuda-is-device -fnative-half-type \
// RUN: -emit-llvm -o - -x cuda %s \
-// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s
+// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s
// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \
-// RUN: -target-cpu sm_53 -target-feature +ptx42 -fcuda-is-device \
+// RUN: -target-cpu sm_53 -target-feature +ptx65 -fcuda-is-device \
// RUN: -fnative-half-type -emit-llvm -o - -x cuda %s \
-// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s
+// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s
#define __device__ __attribute__((device))
@@ -108,25 +108,25 @@ __device__ void nvvm_fma_f16_f16x2_sm80() {
// CHECK-LABEL: nvvm_fma_f16_f16x2_sm53
__device__ void nvvm_fma_f16_f16x2_sm53() {
#if __CUDA_ARCH__ >= 530
- // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.f16
+ // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.f16
__nvvm_fma_rn_f16(0.1f16, 0.1f16, 0.1f16);
- // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.f16
+ // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.f16
__nvvm_fma_rn_ftz_f16(0.1f16, 0.1f16, 0.1f16);
- // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.sat.f16
+ // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.sat.f16
__nvvm_fma_rn_sat_f16(0.1f16, 0.1f16, 0.1f16);
- // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16
+ // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16
__nvvm_fma_rn_ftz_sat_f16(0.1f16, 0.1f16, 0.1f16);
- // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2
+ // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2
__nvvm_fma_rn_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
- // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2
+ // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2
__nvvm_fma_rn_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
- // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2
+ // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2
__nvvm_fma_rn_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
- // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2
+ // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2
__nvvm_fma_rn_ftz_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16},
{0.1f16, 0.7f16});
#endif
@@ -173,6 +173,23 @@ __device__ void nvvm_min_max_sm86() {
// CHECK: ret void
}
+// CHECK-LABEL: nvvm_fabs_f16
+__device__ void nvvm_fabs_f16() {
+#if __CUDA_ARCH__ >= 530
+ // CHECK: call half @llvm.nvvm.fabs.f16
+ __nvvm_fabs_f16(0.1f16);
+ // CHECK: call half @llvm.nvvm.fabs.ftz.f16
+ __nvvm_fabs_ftz_f16(0.1f16);
+ // CHECK: call <2 x half> @llvm.nvvm.fabs.v2f16
+ __nvvm_fabs_f16x2({0.1f16, 0.7f16});
+ // CHECK: call <2 x half> @llvm.nvvm.fabs.ftz.v2f16
+ __nvvm_fabs_ftz_f16x2({0.1f16, 0.7f16});
+#endif
+ // CHECK: ret void
+}
+
+
+
typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
// CHECK-LABEL: nvvm_ldg_native_half_types
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index 7404ce0..639c181 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -245,6 +245,14 @@ __device__ void nvvm_math(float f1, float f2, double d1, double d2) {
// CHECK: call double @llvm.nvvm.rcp.rn.d
double td4 = __nvvm_rcp_rn_d(d2);
+// CHECK: call float @llvm.nvvm.fabs.f32
+ float t6 = __nvvm_fabs_f(f1);
+// CHECK: call float @llvm.nvvm.fabs.ftz.f32
+ float t7 = __nvvm_fabs_ftz_f(f2);
+
+// CHECK: call double @llvm.fabs.f64
+ double td5 = __nvvm_fabs_d(d1);
+
// CHECK: call void @llvm.nvvm.membar.cta()
__nvvm_membar_cta();
// CHECK: call void @llvm.nvvm.membar.gl()
@@ -1181,9 +1189,9 @@ __device__ void nvvm_cvt_sm100a_sm101a_sm120a() {
__device__ void nvvm_abs_neg_bf16_bf16x2_sm80() {
#if __CUDA_ARCH__ >= 800
- // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.abs.bf16(bfloat 0xR3DCD)
+ // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.fabs.bf16(bfloat 0xR3DCD)
__nvvm_abs_bf16(BF16);
- // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD))
+ // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.fabs.v2bf16(<2 x bfloat> splat (bfloat 0xR3DCD))
__nvvm_abs_bf16x2(BF16X2);
// CHECK_PTX70_SM80: call bfloat @llvm.nvvm.neg.bf16(bfloat 0xR3DCD)