aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGCall.cpp10
-rw-r--r--clang/lib/CodeGen/CGCall.h6
-rw-r--r--clang/lib/CodeGen/CGDebugInfo.cpp11
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp129
-rw-r--r--clang/lib/CodeGen/CGExprCXX.cpp12
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp5
-rw-r--r--clang/lib/CodeGen/CGHLSLBuiltins.cpp13
-rw-r--r--clang/lib/CodeGen/CGHLSLRuntime.cpp16
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp7
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp5
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h7
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp201
-rw-r--r--clang/lib/CodeGen/Targets/SPIR.cpp26
13 files changed, 417 insertions, 31 deletions
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index a931ce4..df28641 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2012,13 +2012,6 @@ static void getTrivialDefaultFunctionAttributes(
FuncAttrs.addAttribute("no-infs-fp-math", "true");
if (LangOpts.NoHonorNaNs)
FuncAttrs.addAttribute("no-nans-fp-math", "true");
- if (LangOpts.AllowFPReassoc && LangOpts.AllowRecip &&
- LangOpts.NoSignedZero && LangOpts.ApproxFunc &&
- (LangOpts.getDefaultFPContractMode() ==
- LangOptions::FPModeKind::FPM_Fast ||
- LangOpts.getDefaultFPContractMode() ==
- LangOptions::FPModeKind::FPM_FastHonorPragmas))
- FuncAttrs.addAttribute("unsafe-fp-math", "true");
if (CodeGenOpts.SoftFloat)
FuncAttrs.addAttribute("use-soft-float", "true");
FuncAttrs.addAttribute("stack-protector-buffer-size",
@@ -3018,8 +3011,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
ArgNo = 0;
if (AddedPotentialArgAccess && MemAttrForPtrArgs) {
- llvm::FunctionType *FunctionType = FunctionType =
- getTypes().GetFunctionType(FI);
+ llvm::FunctionType *FunctionType = getTypes().GetFunctionType(FI);
for (CGFunctionInfo::const_arg_iterator I = FI.arg_begin(),
E = FI.arg_end();
I != E; ++I, ++ArgNo) {
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index 935b508..1ef8a3f 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -410,10 +410,10 @@ public:
/// This is useful for adding attrs to bitcode modules that you want to link
/// with but don't control, such as CUDA's libdevice. When linking with such
/// a bitcode library, you might want to set e.g. its functions'
-/// "unsafe-fp-math" attribute to match the attr of the functions you're
+/// "denormal-fp-math" attribute to match the attr of the functions you're
/// codegen'ing. Otherwise, LLVM will interpret the bitcode module's lack of
-/// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
-/// will propagate unsafe-fp-math=false up to every transitive caller of a
+/// denormal-fp-math attrs as tantamount to denormal-fp-math=ieee, and then LLVM
+/// will propagate denormal-fp-math=ieee up to every transitive caller of a
/// function in the bitcode library!
///
/// With the exception of fast-math attrs, this will only make the attributes
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index b91cb36..9fe9a13 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -900,10 +900,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
assert((BT->getKind() != BuiltinType::SveCount || Info.NumVectors == 1) &&
"Unsupported number of vectors for svcount_t");
- // Debuggers can't extract 1bit from a vector, so will display a
- // bitpattern for predicates instead.
unsigned NumElems = Info.EC.getKnownMinValue() * Info.NumVectors;
- if (Info.ElementType == CGM.getContext().BoolTy) {
+ llvm::Metadata *BitStride = nullptr;
+ if (BT->getKind() == BuiltinType::SveBool) {
+ Info.ElementType = CGM.getContext().UnsignedCharTy;
+ BitStride = llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+ llvm::Type::getInt64Ty(CGM.getLLVMContext()), 1));
+ } else if (BT->getKind() == BuiltinType::SveCount) {
NumElems /= 8;
Info.ElementType = CGM.getContext().UnsignedCharTy;
}
@@ -929,7 +932,7 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
getOrCreateType(Info.ElementType, TheCU->getFile());
auto Align = getTypeAlignIfRequired(BT, CGM.getContext());
return DBuilder.createVectorType(/*Size*/ 0, Align, ElemTy,
- SubscriptArray);
+ SubscriptArray, BitStride);
}
// It doesn't make sense to generate debug info for PowerPC MMA vector types.
// So we return a safe type here to avoid generating an error.
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 7dd6a83..e8255b0 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -30,6 +30,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/NSAPI.h"
+#include "clang/AST/ParentMapContext.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/CodeGenOptions.h"
@@ -1353,6 +1354,115 @@ void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, QualType AllocType) {
CB->setMetadata(llvm::LLVMContext::MD_alloc_token, MDN);
}
+namespace {
+/// Infer type from a simple sizeof expression.
+QualType inferTypeFromSizeofExpr(const Expr *E) {
+ const Expr *Arg = E->IgnoreParenImpCasts();
+ if (const auto *UET = dyn_cast<UnaryExprOrTypeTraitExpr>(Arg)) {
+ if (UET->getKind() == UETT_SizeOf) {
+ if (UET->isArgumentType())
+ return UET->getArgumentTypeInfo()->getType();
+ else
+ return UET->getArgumentExpr()->getType();
+ }
+ }
+ return QualType();
+}
+
+/// Infer type from an arithmetic expression involving a sizeof. For example:
+///
+/// malloc(sizeof(MyType) + padding); // infers 'MyType'
+/// malloc(sizeof(MyType) * 32); // infers 'MyType'
+/// malloc(32 * sizeof(MyType)); // infers 'MyType'
+/// malloc(sizeof(MyType) << 1); // infers 'MyType'
+/// ...
+///
+/// More complex arithmetic expressions are supported, but are a heuristic, e.g.
+/// when considering allocations for structs with flexible array members:
+///
+/// malloc(sizeof(HasFlexArray) + sizeof(int) * 32); // infers 'HasFlexArray'
+///
+QualType inferPossibleTypeFromArithSizeofExpr(const Expr *E) {
+ const Expr *Arg = E->IgnoreParenImpCasts();
+ // The argument is a lone sizeof expression.
+ if (QualType T = inferTypeFromSizeofExpr(Arg); !T.isNull())
+ return T;
+ if (const auto *BO = dyn_cast<BinaryOperator>(Arg)) {
+ // Argument is an arithmetic expression. Cover common arithmetic patterns
+ // involving sizeof.
+ switch (BO->getOpcode()) {
+ case BO_Add:
+ case BO_Div:
+ case BO_Mul:
+ case BO_Shl:
+ case BO_Shr:
+ case BO_Sub:
+ if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getLHS());
+ !T.isNull())
+ return T;
+ if (QualType T = inferPossibleTypeFromArithSizeofExpr(BO->getRHS());
+ !T.isNull())
+ return T;
+ break;
+ default:
+ break;
+ }
+ }
+ return QualType();
+}
+
+/// If the expression E is a reference to a variable, infer the type from a
+/// variable's initializer if it contains a sizeof. Beware, this is a heuristic
+/// and ignores if a variable is later reassigned. For example:
+///
+/// size_t my_size = sizeof(MyType);
+/// void *x = malloc(my_size); // infers 'MyType'
+///
+QualType inferPossibleTypeFromVarInitSizeofExpr(const Expr *E) {
+ const Expr *Arg = E->IgnoreParenImpCasts();
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Arg)) {
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+ if (const Expr *Init = VD->getInit())
+ return inferPossibleTypeFromArithSizeofExpr(Init);
+ }
+ }
+ return QualType();
+}
+
+/// Deduces the allocated type by checking if the allocation call's result
+/// is immediately used in a cast expression. For example:
+///
+/// MyType *x = (MyType *)malloc(4096); // infers 'MyType'
+///
+QualType inferPossibleTypeFromCastExpr(const CallExpr *CallE,
+ const CastExpr *CastE) {
+ if (!CastE)
+ return QualType();
+ QualType PtrType = CastE->getType();
+ if (PtrType->isPointerType())
+ return PtrType->getPointeeType();
+ return QualType();
+}
+} // end anonymous namespace
+
+void CodeGenFunction::EmitAllocToken(llvm::CallBase *CB, const CallExpr *E) {
+ QualType AllocType;
+ // First check arguments.
+ for (const Expr *Arg : E->arguments()) {
+ AllocType = inferPossibleTypeFromArithSizeofExpr(Arg);
+ if (AllocType.isNull())
+ AllocType = inferPossibleTypeFromVarInitSizeofExpr(Arg);
+ if (!AllocType.isNull())
+ break;
+ }
+ // Then check later casts.
+ if (AllocType.isNull())
+ AllocType = inferPossibleTypeFromCastExpr(E, CurCast);
+ // Emit if we were able to infer the type.
+ if (!AllocType.isNull())
+ EmitAllocToken(CB, AllocType);
+}
+
CodeGenFunction::ComplexPairTy CodeGenFunction::
EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
bool isInc, bool isPre) {
@@ -5723,6 +5833,9 @@ LValue CodeGenFunction::EmitConditionalOperatorLValue(
/// are permitted with aggregate result, including noop aggregate casts, and
/// cast from scalar to union.
LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
+ auto RestoreCurCast =
+ llvm::make_scope_exit([this, Prev = CurCast] { CurCast = Prev; });
+ CurCast = E;
switch (E->getCastKind()) {
case CK_ToVoid:
case CK_BitCast:
@@ -6668,16 +6781,24 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType,
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
E == MustTailCall, E->getExprLoc());
- // Generate function declaration DISuprogram in order to be used
- // in debug info about call sites.
- if (CGDebugInfo *DI = getDebugInfo()) {
- if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+ if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+ // Generate function declaration DISuprogram in order to be used
+ // in debug info about call sites.
+ if (CGDebugInfo *DI = getDebugInfo()) {
FunctionArgList Args;
QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
DI->getFunctionType(CalleeDecl, ResTy, Args),
CalleeDecl);
}
+ if (CalleeDecl->hasAttr<RestrictAttr>() ||
+ CalleeDecl->hasAttr<AllocSizeAttr>()) {
+ // Function has 'malloc' (aka. 'restrict') or 'alloc_size' attribute.
+ if (SanOpts.has(SanitizerKind::AllocToken)) {
+ // Set !alloc_token metadata.
+ EmitAllocToken(LocalCallOrInvoke, E);
+ }
+ }
}
if (CallOrInvoke)
*CallOrInvoke = LocalCallOrInvoke;
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 290c2e0..31ac266 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1371,8 +1371,16 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
for (auto *Decl : Ctx.getTranslationUnitDecl()->lookup(Name))
if (auto *FD = dyn_cast<FunctionDecl>(Decl))
- if (Ctx.hasSameType(FD->getType(), QualType(Type, 0)))
- return EmitNewDeleteCall(*this, FD, Type, Args);
+ if (Ctx.hasSameType(FD->getType(), QualType(Type, 0))) {
+ RValue RV = EmitNewDeleteCall(*this, FD, Type, Args);
+ if (auto *CB = dyn_cast_if_present<llvm::CallBase>(RV.getScalarVal())) {
+ if (SanOpts.has(SanitizerKind::AllocToken)) {
+ // Set !alloc_token metadata.
+ EmitAllocToken(CB, TheCall);
+ }
+ }
+ return RV;
+ }
llvm_unreachable("predeclared global operator new/delete is missing");
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 06d9d81..715160d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -33,6 +33,7 @@
#include "clang/Basic/DiagnosticTrap.h"
#include "clang/Basic/TargetInfo.h"
#include "llvm/ADT/APFixedPoint.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -2434,6 +2435,10 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
// have to handle a more broad range of conversions than explicit casts, as they
// handle things like function to ptr-to-function decay etc.
Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
+ auto RestoreCurCast =
+ llvm::make_scope_exit([this, Prev = CGF.CurCast] { CGF.CurCast = Prev; });
+ CGF.CurCast = CE;
+
Expr *E = CE->getSubExpr();
QualType DestTy = CE->getType();
CastKind Kind = CE->getCastKind();
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 6c0fc8d..4f2f5a76 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -352,6 +352,19 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
SmallVector<Value *> Args{OrderID, SpaceOp, RangeOp, IndexOp, Name};
return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
}
+ case Builtin::BI__builtin_hlsl_resource_counterhandlefromimplicitbinding: {
+ Value *MainHandle = EmitScalarExpr(E->getArg(0));
+ if (!CGM.getTriple().isSPIRV())
+ return MainHandle;
+
+ llvm::Type *HandleTy = CGM.getTypes().ConvertType(E->getType());
+ Value *OrderID = EmitScalarExpr(E->getArg(1));
+ Value *SpaceOp = EmitScalarExpr(E->getArg(2));
+ llvm::Intrinsic::ID IntrinsicID =
+ llvm::Intrinsic::spv_resource_counterhandlefromimplicitbinding;
+ SmallVector<Value *> Args{MainHandle, OrderID, SpaceOp};
+ return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args);
+ }
case Builtin::BI__builtin_hlsl_resource_nonuniformindex: {
Value *IndexOp = EmitScalarExpr(E->getArg(0));
llvm::Type *RetTy = ConvertType(E->getType());
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index ede1780..603cef9 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -145,19 +145,29 @@ static CXXMethodDecl *lookupResourceInitMethodAndSetupArgs(
// explicit binding
auto *RegSlot = llvm::ConstantInt::get(CGM.IntTy, Binding.getSlot());
Args.add(RValue::get(RegSlot), AST.UnsignedIntTy);
- CreateMethod = lookupMethod(ResourceDecl, "__createFromBinding", SC_Static);
+ const char *Name = Binding.hasCounterImplicitOrderID()
+ ? "__createFromBindingWithImplicitCounter"
+ : "__createFromBinding";
+ CreateMethod = lookupMethod(ResourceDecl, Name, SC_Static);
} else {
// implicit binding
auto *OrderID =
llvm::ConstantInt::get(CGM.IntTy, Binding.getImplicitOrderID());
Args.add(RValue::get(OrderID), AST.UnsignedIntTy);
- CreateMethod =
- lookupMethod(ResourceDecl, "__createFromImplicitBinding", SC_Static);
+ const char *Name = Binding.hasCounterImplicitOrderID()
+ ? "__createFromImplicitBindingWithImplicitCounter"
+ : "__createFromImplicitBinding";
+ CreateMethod = lookupMethod(ResourceDecl, Name, SC_Static);
}
Args.add(RValue::get(Space), AST.UnsignedIntTy);
Args.add(RValue::get(Range), AST.IntTy);
Args.add(RValue::get(Index), AST.UnsignedIntTy);
Args.add(RValue::get(NameStr), AST.getPointerType(AST.CharTy.withConst()));
+ if (Binding.hasCounterImplicitOrderID()) {
+ uint32_t CounterBinding = Binding.getCounterImplicitOrderID();
+ auto *CounterOrderID = llvm::ConstantInt::get(CGM.IntTy, CounterBinding);
+ Args.add(RValue::get(CounterOrderID), AST.UnsignedIntTy);
+ }
return CreateMethod;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 4272d8b..3613b6a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -869,6 +869,8 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
CGM.getLangOpts().OpenMPOffloadMandatory,
/*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
+ Config.setDefaultTargetAS(
+ CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
OMPBuilder.setConfig(Config);
if (!CGM.getLangOpts().OpenMPIsTargetDevice)
@@ -1243,7 +1245,10 @@ void CGOpenMPRuntimeGPU::emitParallelCall(
llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
if (WFn)
ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
- llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);
+ llvm::Type *FnPtrTy = llvm::PointerType::get(
+ CGF.getLLVMContext(), CGM.getDataLayout().getProgramAddressSpace());
+
+ llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, FnPtrTy);
// Create a private scope that will globalize the arguments
// passed from the outside of the target region.
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index acf8de4..8862853 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -183,11 +183,6 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) {
mergeFnAttrValue("no-infs-fp-math", FPFeatures.getNoHonorInfs());
mergeFnAttrValue("no-nans-fp-math", FPFeatures.getNoHonorNaNs());
mergeFnAttrValue("no-signed-zeros-fp-math", FPFeatures.getNoSignedZero());
- mergeFnAttrValue(
- "unsafe-fp-math",
- FPFeatures.getAllowFPReassociate() && FPFeatures.getAllowReciprocal() &&
- FPFeatures.getAllowApproxFunc() && FPFeatures.getNoSignedZero() &&
- FPFeatures.allowFPContractAcrossStatement());
}
CodeGenFunction::CGFPOptionsRAII::~CGFPOptionsRAII() {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e14e60c..1f0be2d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -346,6 +346,10 @@ public:
QualType FnRetTy;
llvm::Function *CurFn = nullptr;
+ /// If a cast expression is being visited, this holds the current cast's
+ /// expression.
+ const CastExpr *CurCast = nullptr;
+
/// Save Parameter Decl for coroutine.
llvm::SmallVector<const ParmVarDecl *, 4> FnArgs;
@@ -3350,6 +3354,9 @@ public:
/// Emit additional metadata used by the AllocToken instrumentation.
void EmitAllocToken(llvm::CallBase *CB, QualType AllocType);
+ /// Emit additional metadata used by the AllocToken instrumentation,
+ /// inferring the type from an allocation call expression.
+ void EmitAllocToken(llvm::CallBase *CB, const CallExpr *E);
llvm::Value *GetCountedByFieldExprGEP(const Expr *Base, const FieldDecl *FD,
const FieldDecl *CountDecl);
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec0..5049a0a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -11,8 +11,11 @@
//===----------------------------------------------------------------------===//
#include "CGBuiltin.h"
+#include "CodeGenFunction.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
@@ -181,6 +184,74 @@ static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,
return Call;
}
+static llvm::Value *loadTextureDescPtorAsVec8I32(CodeGenFunction &CGF,
+ llvm::Value *RsrcPtr) {
+ auto &B = CGF.Builder;
+ auto *VecTy = llvm::FixedVectorType::get(B.getInt32Ty(), 8);
+
+ if (RsrcPtr->getType() == VecTy)
+ return RsrcPtr;
+
+ if (RsrcPtr->getType()->isIntegerTy(32)) {
+ llvm::PointerType *VecPtrTy =
+ llvm::PointerType::get(CGF.getLLVMContext(), 8);
+ llvm::Value *Ptr = B.CreateIntToPtr(RsrcPtr, VecPtrTy, "tex.rsrc.from.int");
+ return B.CreateAlignedLoad(VecTy, Ptr, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ if (RsrcPtr->getType()->isPointerTy()) {
+ auto *VecPtrTy = llvm::PointerType::get(
+ CGF.getLLVMContext(), RsrcPtr->getType()->getPointerAddressSpace());
+ llvm::Value *Typed = B.CreateBitCast(RsrcPtr, VecPtrTy, "tex.rsrc.typed");
+ return B.CreateAlignedLoad(VecTy, Typed, llvm::Align(32), "tex.rsrc.val");
+ }
+
+ const auto &DL = CGF.CGM.getDataLayout();
+ if (DL.getTypeSizeInBits(RsrcPtr->getType()) == 256)
+ return B.CreateBitCast(RsrcPtr, VecTy, "tex.rsrc.val");
+
+ llvm::report_fatal_error("Unexpected texture resource argument form");
+}
+
+llvm::CallInst *
+emitAMDGCNImageOverloadedReturnType(clang::CodeGen::CodeGenFunction &CGF,
+ const clang::CallExpr *E,
+ unsigned IntrinsicID, bool IsImageStore) {
+ auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
+ QualType TexQT = CGF.getContext().AMDGPUTextureTy;
+ for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
+ QualType ArgTy = E->getArg(I)->getType();
+ if (ArgTy == TexQT) {
+ return I;
+ }
+
+ if (ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
+ return I;
+ }
+ }
+
+ return ~0U;
+ };
+
+ clang::SmallVector<llvm::Value *, 10> Args;
+ unsigned RsrcIndex = findTextureDescIndex(E);
+
+ if (RsrcIndex == ~0U) {
+ llvm::report_fatal_error("Invalid argument count for image builtin");
+ }
+
+ for (unsigned I = 0; I < E->getNumArgs(); ++I) {
+ llvm::Value *V = CGF.EmitScalarExpr(E->getArg(I));
+ if (I == RsrcIndex)
+ V = loadTextureDescPtorAsVec8I32(CGF, V);
+ Args.push_back(V);
+ }
+
+ llvm::Type *RetTy = IsImageStore ? CGF.VoidTy : CGF.ConvertType(E->getType());
+ llvm::CallInst *Call = CGF.Builder.CreateIntrinsic(RetTy, IntrinsicID, Args);
+ return Call;
+}
+
// Emit an intrinsic that has 1 float or double operand, and 1 integer.
static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
const CallExpr *E,
@@ -937,6 +1008,136 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
return Builder.CreateInsertElement(I0, A, 1);
}
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_load_mip_cube, false);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_1darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_2darray, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_3d, true);
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
+ case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_store_mip_cube, true);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_1darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_2darray_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_2darray, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_3d, false);
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32:
+ case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32:
+ return emitAMDGCNImageOverloadedReturnType(
+ *this, E, Intrinsic::amdgcn_image_sample_cube, false);
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8);
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa6314..3f6d4e0 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -61,6 +61,9 @@ public:
QualType SampledType, CodeGenModule &CGM) const;
void
setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
+ llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
+ llvm::PointerType *T,
+ QualType QT) const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
@@ -240,6 +243,29 @@ void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
}
+// LLVM currently assumes a null pointer has the bit pattern 0, but some GPU
+// targets use a non-zero encoding for null in certain address spaces.
+// Because SPIR(-V) is a generic target and the bit pattern of null in
+// non-generic AS is unspecified, materialize null in non-generic AS via an
+// addrspacecast from null in generic AS. This allows later lowering to
+// substitute the target's real sentinel value.
+llvm::Constant *
+CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
+ llvm::PointerType *PT,
+ QualType QT) const {
+ LangAS AS = QT->getUnqualifiedDesugaredType()->isNullPtrType()
+ ? LangAS::Default
+ : QT->getPointeeType().getAddressSpace();
+ if (AS == LangAS::Default || AS == LangAS::opencl_generic)
+ return llvm::ConstantPointerNull::get(PT);
+
+ auto &Ctx = CGM.getContext();
+ auto NPT = llvm::PointerType::get(
+ PT->getContext(), Ctx.getTargetAddressSpace(LangAS::opencl_generic));
+ return llvm::ConstantExpr::getAddrSpaceCast(
+ llvm::ConstantPointerNull::get(NPT), PT);
+}
+
LangAS
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {