diff options
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/BuiltinsNVPTX.td | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp | 15 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-nvptx-native-half-type.c | 41 | ||||
-rw-r--r-- | clang/test/CodeGen/builtins-nvptx.c | 12 |
4 files changed, 59 insertions, 14 deletions
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td index bdbdfa2..f797e29 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.td +++ b/clang/include/clang/Basic/BuiltinsNVPTX.td @@ -321,6 +321,11 @@ def __nvvm_fabs_ftz_f : NVPTXBuiltin<"float(float)">; def __nvvm_fabs_f : NVPTXBuiltin<"float(float)">; def __nvvm_fabs_d : NVPTXBuiltin<"double(double)">; +def __nvvm_fabs_f16 : NVPTXBuiltinSMAndPTX<"__fp16(__fp16)", SM_53, PTX65>; +def __nvvm_fabs_f16x2 : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(_Vector<2, __fp16>)", SM_53, PTX65>; +def __nvvm_fabs_ftz_f16 : NVPTXBuiltinSMAndPTX<"__fp16(__fp16)", SM_53, PTX65>; +def __nvvm_fabs_ftz_f16x2 : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(_Vector<2, __fp16>)", SM_53, PTX65>; + // Round def __nvvm_round_ftz_f : NVPTXBuiltin<"float(float)">; diff --git a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp index 0f7ab9f..002af4f 100644 --- a/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/NVPTX.cpp @@ -1034,6 +1034,21 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, *this); + case NVPTX::BI__nvvm_fabs_f: + case NVPTX::BI__nvvm_abs_bf16: + case NVPTX::BI__nvvm_abs_bf16x2: + case NVPTX::BI__nvvm_fabs_f16: + case NVPTX::BI__nvvm_fabs_f16x2: + return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_fabs_ftz_f: + case NVPTX::BI__nvvm_fabs_ftz_f16: + case NVPTX::BI__nvvm_fabs_ftz_f16x2: + return Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs_ftz, + EmitScalarExpr(E->getArg(0))); + case NVPTX::BI__nvvm_fabs_d: + return Builder.CreateUnaryIntrinsic(Intrinsic::fabs, + EmitScalarExpr(E->getArg(0))); case NVPTX::BI__nvvm_ldg_h: case NVPTX::BI__nvvm_ldg_h2: return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this); diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c index 5114977..01a004e 100644 --- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c +++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c @@ -26,14 +26,14 @@ // RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX72_SM86 %s // RUN: %clang_cc1 -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \ -// RUN: sm_53 -target-feature +ptx42 -fcuda-is-device -fnative-half-type \ +// RUN: sm_53 -target-feature +ptx65 -fcuda-is-device -fnative-half-type \ // RUN: -emit-llvm -o - -x cuda %s \ -// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown \ -// RUN: -target-cpu sm_53 -target-feature +ptx42 -fcuda-is-device \ +// RUN: -target-cpu sm_53 -target-feature +ptx65 -fcuda-is-device \ // RUN: -fnative-half-type -emit-llvm -o - -x cuda %s \ -// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX42_SM53 %s +// RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX65_SM53 %s #define __device__ __attribute__((device)) @@ -108,25 +108,25 @@ __device__ void nvvm_fma_f16_f16x2_sm80() { // CHECK-LABEL: nvvm_fma_f16_f16x2_sm53 __device__ void nvvm_fma_f16_f16x2_sm53() { #if __CUDA_ARCH__ >= 530 - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.f16 + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.f16 __nvvm_fma_rn_f16(0.1f16, 0.1f16, 0.1f16); - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.f16 + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.f16 __nvvm_fma_rn_ftz_f16(0.1f16, 0.1f16, 0.1f16); - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.sat.f16 + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.sat.f16 __nvvm_fma_rn_sat_f16(0.1f16, 0.1f16, 0.1f16); - // CHECK_PTX42_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16 + // CHECK_PTX65_SM53: call half @llvm.nvvm.fma.rn.ftz.sat.f16 __nvvm_fma_rn_ftz_sat_f16(0.1f16, 0.1f16, 0.1f16); - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2 + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.f16x2 __nvvm_fma_rn_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, {0.1f16, 0.7f16}); - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2 + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.f16x2 __nvvm_fma_rn_ftz_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, {0.1f16, 0.7f16}); - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2 + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.sat.f16x2 __nvvm_fma_rn_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, {0.1f16, 0.7f16}); - // CHECK_PTX42_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2 + // CHECK_PTX65_SM53: call <2 x half> @llvm.nvvm.fma.rn.ftz.sat.f16x2 __nvvm_fma_rn_ftz_sat_f16x2({0.1f16, 0.7f16}, {0.1f16, 0.7f16}, {0.1f16, 0.7f16}); #endif @@ -173,6 +173,23 @@ __device__ void nvvm_min_max_sm86() { // CHECK: ret void } +// CHECK-LABEL: nvvm_fabs_f16 +__device__ void nvvm_fabs_f16() { +#if __CUDA_ARCH__ >= 530 + // CHECK: call half @llvm.nvvm.fabs.f16 + __nvvm_fabs_f16(0.1f16); + // CHECK: call half @llvm.nvvm.fabs.ftz.f16 + __nvvm_fabs_ftz_f16(0.1f16); + // CHECK: call <2 x half> @llvm.nvvm.fabs.v2f16 + __nvvm_fabs_f16x2({0.1f16, 0.7f16}); + // CHECK: call <2 x half> @llvm.nvvm.fabs.ftz.v2f16 + __nvvm_fabs_ftz_f16x2({0.1f16, 0.7f16}); +#endif + // CHECK: ret void +} + + + typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2))); // CHECK-LABEL: nvvm_ldg_native_half_types diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 7404ce0..639c181 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -245,6 +245,14 @@ __device__ void nvvm_math(float f1, float f2, double d1, double d2) { // CHECK: call double @llvm.nvvm.rcp.rn.d double td4 = __nvvm_rcp_rn_d(d2); +// CHECK: call float @llvm.nvvm.fabs.f32 + float t6 = __nvvm_fabs_f(f1); +// CHECK: call float @llvm.nvvm.fabs.ftz.f32 + float t7 = __nvvm_fabs_ftz_f(f2); + +// CHECK: call double @llvm.fabs.f64 + double td5 = __nvvm_fabs_d(d1); + // CHECK: call void @llvm.nvvm.membar.cta() __nvvm_membar_cta(); // CHECK: call void @llvm.nvvm.membar.gl() @@ -1181,9 +1189,9 @@ __device__ void nvvm_cvt_sm100a_sm101a_sm120a() { __device__ void nvvm_abs_neg_bf16_bf16x2_sm80() { #if __CUDA_ARCH__ >= 800 - // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.abs.bf16(bfloat 0xR3DCD) + // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.fabs.bf16(bfloat 0xR3DCD) __nvvm_abs_bf16(BF16); - // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD)) + // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.fabs.v2bf16(<2 x bfloat> splat (bfloat 0xR3DCD)) __nvvm_abs_bf16x2(BF16X2); // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.neg.bf16(bfloat 0xR3DCD) |