aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGCall.cpp7
-rw-r--r--clang/lib/CodeGen/CGCall.h6
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp5
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp201
4 files changed, 204 insertions, 15 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index c5371e4..df28641 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2012,13 +2012,6 @@ static void getTrivialDefaultFunctionAttributes(
FuncAttrs.addAttribute("no-infs-fp-math", "true");
if (LangOpts.NoHonorNaNs)
FuncAttrs.addAttribute("no-nans-fp-math", "true");
- if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip &&
- LangOpts.NoSignedZero && LangOpts.ApproxFunc &&
- (LangOpts.getDefaultFPContractMode() ==
- LangOptions::FPModeKind::FPM_Fast ||
- LangOpts.getDefaultFPContractMode() ==
- LangOptions::FPModeKind::FPM_FastHonorPragmas))
- FuncAttrs.addAttribute("unsafe-fp-math", "true");
if (CodeGenOpts.SoftFloat)
FuncAttrs.addAttribute("use-soft-float", "true");
FuncAttrs.addAttribute("stack-protector-buffer-size",
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index 935b508..1ef8a3f 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -410,10 +410,10 @@ public:
/// This is useful for adding attrs to bitcode modules that you want to link
/// with but don't control, such as CUDA's libdevice. When linking with such
/// a bitcode library, you might want to set e.g. its functions'
-/// "unsafe-fp-math" attribute to match the attr of the functions you're
+/// "denormal-fp-math" attribute to match the attr of the functions you're
/// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
-/// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
-/// will propagate unsafe-fp-math=false up to every transitive caller of a
+/// denormal-fp-math attrs as tantamount to denormal-fp-math=ieee, and then LLVM
+/// will propagate denormal-fp-math=ieee up to every transitive caller of a
/// function in the bitcode library!
///
/// With the exception of fast-math attrs, this will only make the attributes
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index acf8de4..8862853 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -183,11 +183,6 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs());
mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs());
mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero());
- mergeFnAttrValue(
- "unsafe-fp-math",
- FPFeatures.getAllowFPReassociate() && FPFeatures.getAllowReciprocal() &&
- FPFeatures.getAllowApproxFunc() && FPFeatures.getNoSignedZero() &&
- FPFeatures.allowFPContractAcrossStatement());
}
CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() {
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec0..5049a0a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -11,8 +11,11 @@
//===----------------------------------------------------------------------===//
#include "CGBuiltin.h"
+#include "CodeGenFunction.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
@@ -181,6 +184,74 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
+static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
+ llvm::Value *RsrcPtr) {
+ auto &B = CGF.Builder;
+ auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
+
+ if (RsrcPtr->getType() == VecTy)
+ return RsrcPtr;
+
+ if (RsrcPtr->getType()->isIntegerTy(32)) {
+ llvm::PointerType *VecPtrTy =
+ llvm::PointerType::get(CGF.getLLVMContext(), 8);
+ llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
+ return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ if (RsrcPtr->getType()->isPointerTy()) {
+ auto *VecPtrTy = llvm::PointerType::get(
+ CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
+ llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
+ return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ const auto &DL = CGF.CGM.getDataLayout();
+ if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
+ return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
+
+ llvm::report_fatal_error("Unexpected texture resource argument form");
+}
+
+llvm::CallInst *
+emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *E,
+ unsigned IntrinsicID, bool IsImageStore) {
+ auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
+ QualType TexQT = CGF.getContext().AMDGPUTextureTy;
+ for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
+ QualType ArgTy = E->getArg(I)->getType();
+ if (ArgTy == TexQT) {
+ return I;
+ }
+
+ if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
+ return I;
+ }
+ }
+
+ return ~0U;
+ };
+
+ clang::SmallVector<llvm::Value *, 10> Args;
+ unsigned RsrcIndex = findTextureDescIndex(E);
+
+ if (RsrcIndex == ~0U) {
+ llvm::report_fatal_error("Invalid argument count for image builtin");
+ }
+
+ for (unsigned I = 0; I < E->getNumArgs(); ++I) {
+ llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
+ if (I == RsrcIndex)
+ V = loadTextureDescPtorAsVec8I32(CGF, V);
+ Args.push_back(V);
+ }
+
+ llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
+ llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
+ return Call;
+}
+
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
@@ -937,6 +1008,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_cube, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);