diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGCall.cpp | 7 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGCall.h | 6 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 201 |
4 files changed, 204 insertions, 15 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index c5371e4..df28641 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2012,13 +2012,6 @@ static void getTrivialDefaultFunctionAttributes( FuncAttrs.addAttribute("no-infs-fp-math", "true"); if (LangOpts.NoHonorNaNs) FuncAttrs.addAttribute("no-nans-fp-math", "true"); - if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip && - LangOpts.NoSignedZero && LangOpts.ApproxFunc && - (LangOpts.getDefaultFPContractMode() == - LangOptions::FPModeKind::FPM_Fast || - LangOpts.getDefaultFPContractMode() == - LangOptions::FPModeKind::FPM_FastHonorPragmas)) - FuncAttrs.addAttribute("unsafe-fp-math", "true"); if (CodeGenOpts.SoftFloat) FuncAttrs.addAttribute("use-soft-float", "true"); FuncAttrs.addAttribute("stack-protector-buffer-size", diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h index 935b508..1ef8a3f 100644 --- a/clang/lib/CodeGen/CGCall.h +++ b/clang/lib/CodeGen/CGCall.h @@ -410,10 +410,10 @@ public: /// This is useful for adding attrs to bitcode modules that you want to link /// with but don't control, such as CUDA's libdevice. When linking with such /// a bitcode library, you might want to set e.g. its functions' -/// "unsafe-fp-math" attribute to match the attr of the functions you're +/// "denormal-fp-math" attribute to match the attr of the functions you're /// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of -/// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM -/// will propagate unsafe-fp-math=false up to every transitive caller of a +/// denormal-fp-math attrs as tantamount to denormal-fp-math=ieee, and then LLVM +/// will propagate denormal-fp-math=ieee up to every transitive caller of a /// function in the bitcode library! /// /// With the exception of fast-math attrs, this will only make the attributes diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index acf8de4..8862853 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -183,11 +183,6 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) { mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs()); mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs()); mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero()); - mergeFnAttrValue( - "unsafe-fp-math", - FPFeatures.getAllowFPReassociate() && FPFeatures.getAllowReciprocal() && - FPFeatures.getAllowApproxFunc() && FPFeatures.getNoSignedZero() && - FPFeatures.allowFPContractAcrossStatement()); } CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() { diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 6596ec0..5049a0a 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -11,8 +11,11 @@ //===----------------------------------------------------------------------===// #include "CGBuiltin.h" +#include "CodeGenFunction.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" @@ -181,6 +184,74 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, return Call; } +static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF, + llvm::Value *RsrcPtr) { + auto &B = CGF.Builder; + auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8); + + if (RsrcPtr->getType() == VecTy) + return RsrcPtr; + + if (RsrcPtr->getType()->isIntegerTy(32)) { + llvm::PointerType *VecPtrTy = + llvm::PointerType::get(CGF.getLLVMContext(), 8); + llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int"); + return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val"); + } + + if (RsrcPtr->getType()->isPointerTy()) { + auto *VecPtrTy = llvm::PointerType::get( + CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace()); + llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed"); + return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val"); + } + + const auto &DL = CGF.CGM.getDataLayout(); + if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256) + return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val"); + + llvm::report_fatal_error("Unexpected texture resource argument form"); +} + +llvm::CallInst * +emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF, + const clang::CallExpr *E, + unsigned IntrinsicID, bool IsImageStore) { + auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned { + QualType TexQT = CGF.getContext().AMDGPUTextureTy; + for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) { + QualType ArgTy = E->getArg(I)->getType(); + if (ArgTy == TexQT) { + return I; + } + + if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) { + return I; + } + } + + return ~0U; + }; + + clang::SmallVector<llvm::Value *, 10> Args; + unsigned RsrcIndex = findTextureDescIndex(E); + + if (RsrcIndex == ~0U) { + llvm::report_fatal_error("Invalid argument count for image builtin"); + } + + for (unsigned I = 0; I < E->getNumArgs(); ++I) { + llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I)); + if (I == RsrcIndex) + V = loadTextureDescPtorAsVec8I32(CGF, V); + Args.push_back(V); + } + + llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType()); + llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args); + return Call; +} + // Emit an intrinsic that has 1 float or double operand, and 1 integer. static Value *emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, @@ -937,6 +1008,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_cube, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_load_mip_cube, false); + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_1d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_1darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_2d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_2darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_3d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_cube, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_1d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_2d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_3d, true); + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_store_mip_cube, true); + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_1d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_1darray, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_2d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_2darray, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_3d, false); + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_cube, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); |