aboutsummaryrefslogtreecommitdiff
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/AST/ASTContext.cpp73
-rw-r--r--clang/lib/AST/ByteCode/Interp.cpp43
-rw-r--r--clang/lib/AST/ByteCode/InterpBuiltin.cpp85
-rw-r--r--clang/lib/AST/DeclTemplate.cpp67
-rw-r--r--clang/lib/AST/ExprConstant.cpp95
-rw-r--r--clang/lib/Analysis/FlowSensitive/RecordOps.cpp42
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.h7
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp60
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h109
-rw-r--r--clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp144
-rw-r--r--clang/lib/CodeGen/CGExpr.cpp100
-rw-r--r--clang/lib/CodeGen/CGExprAgg.cpp146
-rw-r--r--clang/lib/CodeGen/CGExprScalar.cpp51
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp31
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h6
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp13
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp43
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp53
-rw-r--r--clang/lib/Headers/avx512fp16intrin.h3
-rw-r--r--clang/lib/Headers/avx512vlintrin.h63
-rw-r--r--clang/lib/Sema/SemaConcept.cpp4
-rw-r--r--clang/lib/Sema/SemaDecl.cpp11
-rw-r--r--clang/lib/Sema/SemaHLSL.cpp6
-rw-r--r--clang/lib/Sema/SemaInit.cpp48
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp21
-rw-r--r--clang/lib/Sema/SemaOverload.cpp13
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp2
-rw-r--r--clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp4
28 files changed, 794 insertions, 549 deletions
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 056bfe3..a8b41ba 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -330,76 +330,6 @@ void ASTContext::addComment(const RawComment &RC) {
Comments.addComment(RC, LangOpts.CommentOpts, BumpAlloc);
}
-/// If we have a 'templated' declaration for a template, adjust 'D' to
-/// refer to the actual template.
-/// If we have an implicit instantiation, adjust 'D' to refer to template.
-static const Decl &adjustDeclToTemplate(const Decl &D) {
- if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
- // Is this function declaration part of a function template?
- if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
- return *FTD;
-
- // Nothing to do if function is not an implicit instantiation.
- if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
- return D;
-
- // Function is an implicit instantiation of a function template?
- if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
- return *FTD;
-
- // Function is instantiated from a member definition of a class template?
- if (const FunctionDecl *MemberDecl =
- FD->getInstantiatedFromMemberFunction())
- return *MemberDecl;
-
- return D;
- }
- if (const auto *VD = dyn_cast<VarDecl>(&D)) {
- // Static data member is instantiated from a member definition of a class
- // template?
- if (VD->isStaticDataMember())
- if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
- return *MemberDecl;
-
- return D;
- }
- if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
- // Is this class declaration part of a class template?
- if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
- return *CTD;
-
- // Class is an implicit instantiation of a class template or partial
- // specialization?
- if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
- if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
- return D;
- llvm::PointerUnion<ClassTemplateDecl *,
- ClassTemplatePartialSpecializationDecl *>
- PU = CTSD->getSpecializedTemplateOrPartial();
- return isa<ClassTemplateDecl *>(PU)
- ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU))
- : *static_cast<const Decl *>(
- cast<ClassTemplatePartialSpecializationDecl *>(PU));
- }
-
- // Class is instantiated from a member definition of a class template?
- if (const MemberSpecializationInfo *Info =
- CRD->getMemberSpecializationInfo())
- return *Info->getInstantiatedFrom();
-
- return D;
- }
- if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
- // Enum is instantiated from a member definition of a class template?
- if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
- return *MemberDecl;
-
- return D;
- }
- // FIXME: Adjust alias templates?
- return D;
-}
-
const RawComment *ASTContext::getRawCommentForAnyRedecl(
const Decl *D,
const Decl **OriginalDecl) const {
@@ -976,6 +906,9 @@ void ASTContext::cleanup() {
for (const auto &Value : ModuleInitializers)
Value.second->~PerModuleInitializers();
ModuleInitializers.clear();
+
+ XRayFilter.reset();
+ NoSanitizeL.reset();
}
ASTContext::~ASTContext() { cleanup(); }
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 21af3d6..8904396 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -1638,6 +1638,36 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func,
return true;
}
+static bool GetDynamicDecl(InterpState &S, CodePtr OpPC, Pointer TypePtr,
+ const CXXRecordDecl *&DynamicDecl) {
+ while (TypePtr.isBaseClass())
+ TypePtr = TypePtr.getBase();
+
+ QualType DynamicType = TypePtr.getType();
+ if (TypePtr.isStatic() || TypePtr.isConst()) {
+ const VarDecl *VD = TypePtr.getDeclDesc()->asVarDecl();
+ if (!VD->isConstexpr()) {
+ const Expr *E = S.Current->getExpr(OpPC);
+ APValue V = TypePtr.toAPValue(S.getASTContext());
+ QualType TT = S.getASTContext().getLValueReferenceType(DynamicType);
+ S.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type)
+ << AccessKinds::AK_MemberCall << V.getAsString(S.getASTContext(), TT);
+ return false;
+ }
+ }
+
+ if (DynamicType->isPointerType() || DynamicType->isReferenceType()) {
+ DynamicDecl = DynamicType->getPointeeCXXRecordDecl();
+ } else if (DynamicType->isArrayType()) {
+ const Type *ElemType = DynamicType->getPointeeOrArrayElementType();
+ assert(ElemType);
+ DynamicDecl = ElemType->getAsCXXRecordDecl();
+ } else {
+ DynamicDecl = DynamicType->getAsCXXRecordDecl();
+ }
+ return true;
+}
+
bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
uint32_t VarArgSize) {
assert(Func->hasThisPointer());
@@ -1662,17 +1692,8 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func,
}
const CXXRecordDecl *DynamicDecl = nullptr;
- {
- Pointer TypePtr = ThisPtr;
- while (TypePtr.isBaseClass())
- TypePtr = TypePtr.getBase();
-
- QualType DynamicType = TypePtr.getType();
- if (DynamicType->isPointerType() || DynamicType->isReferenceType())
- DynamicDecl = DynamicType->getPointeeCXXRecordDecl();
- else
- DynamicDecl = DynamicType->getAsCXXRecordDecl();
- }
+ if (!GetDynamicDecl(S, OpPC, ThisPtr, DynamicDecl))
+ return false;
assert(DynamicDecl);
const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl());
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 68ebfdf..6af7ef3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -736,25 +736,6 @@ static bool interp__builtin_expect(InterpState &S, CodePtr OpPC,
return true;
}
-/// rotateleft(value, amount)
-static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC,
- const InterpFrame *Frame,
- const CallExpr *Call, bool Right) {
- APSInt Amount = popToAPSInt(S, Call->getArg(1));
- APSInt Value = popToAPSInt(S, Call->getArg(0));
-
- APSInt Result;
- if (Right)
- Result = APSInt(Value.rotr(Amount.urem(Value.getBitWidth())),
- /*IsUnsigned=*/true);
- else // Left.
- Result = APSInt(Value.rotl(Amount.urem(Value.getBitWidth())),
- /*IsUnsigned=*/true);
-
- pushInteger(S, Result, Call->getType());
- return true;
-}
-
static bool interp__builtin_ffs(InterpState &S, CodePtr OpPC,
const InterpFrame *Frame,
const CallExpr *Call) {
@@ -2916,7 +2897,49 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
});
Dst.initializeAllElements();
+ return true;
+}
+
+static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call, bool MaskZ) {
+ assert(Call->getNumArgs() == 5);
+ APInt U = popToAPSInt(S, Call->getArg(4)); // Lane mask
+ APInt Imm = popToAPSInt(S, Call->getArg(3)); // Ternary truth table
+ const Pointer &C = S.Stk.pop<Pointer>();
+ const Pointer &B = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ unsigned DstLen = A.getNumElems();
+ const QualType ElemQT = getElemType(A);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+ bool DstUnsigned = ElemQT->isUnsignedIntegerOrEnumerationType();
+
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ for (unsigned I = 0; I != DstLen; ++I) {
+ APInt ALane = A.elem<T>(I).toAPSInt();
+ APInt BLane = B.elem<T>(I).toAPSInt();
+ APInt CLane = C.elem<T>(I).toAPSInt();
+ APInt RLane(LaneWidth, 0);
+ if (U[I]) { // If lane not masked, compute ternary logic.
+ for (unsigned Bit = 0; Bit != LaneWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+ unsigned Idx = (ABit << 2) | (BBit << 1) | (CBit);
+ RLane.setBitVal(Bit, Imm[Idx]);
+ }
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+ } else if (MaskZ) { // If zero masked, zero the lane.
+ Dst.elem<T>(I) = static_cast<T>(APSInt(RLane, DstUnsigned));
+ } else { // Just masked, put in A lane.
+ Dst.elem<T>(I) = static_cast<T>(APSInt(ALane, DstUnsigned));
+ }
+ }
+ });
+ Dst.initializeAllElements();
return true;
}
@@ -3160,7 +3183,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI_rotl:
case Builtin::BI_lrotl:
case Builtin::BI_rotl64:
- return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/false);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt {
+ return Value.rotl(Amount);
+ });
case Builtin::BI__builtin_rotateright8:
case Builtin::BI__builtin_rotateright16:
@@ -3171,7 +3197,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case Builtin::BI_rotr:
case Builtin::BI_lrotr:
case Builtin::BI_rotr64:
- return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/true);
+ return interp__builtin_elementwise_int_binop(
+ S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt {
+ return Value.rotr(Amount);
+ });
case Builtin::BI__builtin_ffs:
case Builtin::BI__builtin_ffsl:
@@ -3773,6 +3802,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/false);
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ return interp__builtin_ia32_pternlog(S, OpPC, Call, /*MaskZ=*/true);
case Builtin::BI__builtin_elementwise_fshl:
return interp__builtin_elementwise_triop(S, OpPC, Call,
llvm::APIntOps::fshl);
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index b6bb611..e5fba1b 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -1708,3 +1708,70 @@ TemplateParameterList *clang::getReplacedTemplateParameterList(const Decl *D) {
llvm_unreachable("Unhandled templated declaration kind");
}
}
+
+const Decl &clang::adjustDeclToTemplate(const Decl &D) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(&D)) {
+ // Is this function declaration part of a function template?
+ if (const FunctionTemplateDecl *FTD = FD->getDescribedFunctionTemplate())
+ return *FTD;
+
+ // Nothing to do if function is not an implicit instantiation.
+ if (FD->getTemplateSpecializationKind() != TSK_ImplicitInstantiation)
+ return D;
+
+ // Function is an implicit instantiation of a function template?
+ if (const FunctionTemplateDecl *FTD = FD->getPrimaryTemplate())
+ return *FTD;
+
+ // Function is instantiated from a member definition of a class template?
+ if (const FunctionDecl *MemberDecl =
+ FD->getInstantiatedFromMemberFunction())
+ return *MemberDecl;
+
+ return D;
+ }
+ if (const auto *VD = dyn_cast<VarDecl>(&D)) {
+ // Static data member is instantiated from a member definition of a class
+ // template?
+ if (VD->isStaticDataMember())
+ if (const VarDecl *MemberDecl = VD->getInstantiatedFromStaticDataMember())
+ return *MemberDecl;
+
+ return D;
+ }
+ if (const auto *CRD = dyn_cast<CXXRecordDecl>(&D)) {
+ // Is this class declaration part of a class template?
+ if (const ClassTemplateDecl *CTD = CRD->getDescribedClassTemplate())
+ return *CTD;
+
+ // Class is an implicit instantiation of a class template or partial
+ // specialization?
+ if (const auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(CRD)) {
+ if (CTSD->getSpecializationKind() != TSK_ImplicitInstantiation)
+ return D;
+ llvm::PointerUnion<ClassTemplateDecl *,
+ ClassTemplatePartialSpecializationDecl *>
+ PU = CTSD->getSpecializedTemplateOrPartial();
+ return isa<ClassTemplateDecl *>(PU)
+ ? *static_cast<const Decl *>(cast<ClassTemplateDecl *>(PU))
+ : *static_cast<const Decl *>(
+ cast<ClassTemplatePartialSpecializationDecl *>(PU));
+ }
+
+ // Class is instantiated from a member definition of a class template?
+ if (const MemberSpecializationInfo *Info =
+ CRD->getMemberSpecializationInfo())
+ return *Info->getInstantiatedFrom();
+
+ return D;
+ }
+ if (const auto *ED = dyn_cast<EnumDecl>(&D)) {
+ // Enum is instantiated from a member definition of a class template?
+ if (const EnumDecl *MemberDecl = ED->getInstantiatedFromMemberEnum())
+ return *MemberDecl;
+
+ return D;
+ }
+ // FIXME: Adjust alias templates?
+ return D;
+}
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 7bf28d9..618e163 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12168,6 +12168,97 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ if (U[EltNum]) {
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ } else {
+ ResultElements.push_back(APValue(APSInt(ALane, DestUnsigned)));
+ }
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz: {
+ APValue AValue, BValue, CValue, ImmValue, UValue;
+ if (!EvaluateAsRValue(Info, E->getArg(0), AValue) ||
+ !EvaluateAsRValue(Info, E->getArg(1), BValue) ||
+ !EvaluateAsRValue(Info, E->getArg(2), CValue) ||
+ !EvaluateAsRValue(Info, E->getArg(3), ImmValue) ||
+ !EvaluateAsRValue(Info, E->getArg(4), UValue))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType();
+ APInt Imm = ImmValue.getInt();
+ APInt U = UValue.getInt();
+ unsigned ResultLen = AValue.getVectorLength();
+ SmallVector<APValue, 16> ResultElements;
+ ResultElements.reserve(ResultLen);
+
+ for (unsigned EltNum = 0; EltNum < ResultLen; ++EltNum) {
+ APInt ALane = AValue.getVectorElt(EltNum).getInt();
+ APInt BLane = BValue.getVectorElt(EltNum).getInt();
+ APInt CLane = CValue.getVectorElt(EltNum).getInt();
+
+ unsigned BitWidth = ALane.getBitWidth();
+ APInt ResLane(BitWidth, 0);
+
+ if (U[EltNum]) {
+ for (unsigned Bit = 0; Bit < BitWidth; ++Bit) {
+ unsigned ABit = ALane[Bit];
+ unsigned BBit = BLane[Bit];
+ unsigned CBit = CLane[Bit];
+
+ unsigned Idx = (ABit << 2) | (BBit << 1) | CBit;
+ ResLane.setBitVal(Bit, Imm[Idx]);
+ }
+ }
+ ResultElements.push_back(APValue(APSInt(ResLane, DestUnsigned)));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+
case Builtin::BI__builtin_elementwise_clzg:
case Builtin::BI__builtin_elementwise_ctzg: {
APValue SourceLHS;
@@ -14265,7 +14356,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
!EvaluateInteger(E->getArg(1), Amt, Info))
return false;
- return Success(Val.rotl(Amt.urem(Val.getBitWidth())), E);
+ return Success(Val.rotl(Amt), E);
}
case Builtin::BI__builtin_rotateright8:
@@ -14282,7 +14373,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
!EvaluateInteger(E->getArg(1), Amt, Info))
return false;
- return Success(Val.rotr(Amt.urem(Val.getBitWidth())), E);
+ return Success(Val.rotr(Amt), E);
}
case Builtin::BI__builtin_elementwise_add_sat: {
diff --git a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
index ed827ac..03d6ed8 100644
--- a/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/RecordOps.cpp
@@ -14,6 +14,9 @@
#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
#define DEBUG_TYPE "dataflow"
@@ -79,18 +82,41 @@ void copyRecord(RecordStorageLocation &Src, RecordStorageLocation &Dst,
if (SrcType == DstType || (SrcDecl != nullptr && DstDecl != nullptr &&
SrcDecl->isDerivedFrom(DstDecl))) {
+ // Dst may have children modeled from other derived types than SrcType, e.g.
+ // after casts of Dst to other types derived from DstType. Only copy the
+ // children and synthetic fields present in both Dst and SrcType.
+ const FieldSet FieldsInSrcType =
+ Env.getDataflowAnalysisContext().getModeledFields(SrcType);
for (auto [Field, DstFieldLoc] : Dst.children())
- copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+ if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+ FieldAsFieldDecl && FieldsInSrcType.contains(FieldAsFieldDecl))
+ copyField(*Field, Src.getChild(*Field), DstFieldLoc, Dst, Env);
+ const llvm::StringMap<QualType> SyntheticFieldsForSrcType =
+ Env.getDataflowAnalysisContext().getSyntheticFields(SrcType);
for (const auto &[Name, DstFieldLoc] : Dst.synthetic_fields())
- copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
- *DstFieldLoc, Env);
+ if (SyntheticFieldsForSrcType.contains(Name))
+ copySyntheticField(DstFieldLoc->getType(), Src.getSyntheticField(Name),
+ *DstFieldLoc, Env);
} else if (SrcDecl != nullptr && DstDecl != nullptr &&
DstDecl->isDerivedFrom(SrcDecl)) {
- for (auto [Field, SrcFieldLoc] : Src.children())
- copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
- for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields())
- copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
- Dst.getSyntheticField(Name), Env);
+ // Src may have children modeled from other derived types than DstType, e.g.
+ // after other casts of Src to those types (likely in different branches,
+ // but without flow-condition-dependent field modeling). Only copy the
+ // children and synthetic fields of Src that are present in DstType.
+ const FieldSet FieldsInDstType =
+ Env.getDataflowAnalysisContext().getModeledFields(DstType);
+ for (auto [Field, SrcFieldLoc] : Src.children()) {
+ if (const auto *FieldAsFieldDecl = dyn_cast<FieldDecl>(Field);
+ FieldAsFieldDecl && FieldsInDstType.contains(FieldAsFieldDecl))
+ copyField(*Field, SrcFieldLoc, Dst.getChild(*Field), Dst, Env);
+ }
+ const llvm::StringMap<QualType> SyntheticFieldsForDstType =
+ Env.getDataflowAnalysisContext().getSyntheticFields(DstType);
+ for (const auto &[Name, SrcFieldLoc] : Src.synthetic_fields()) {
+ if (SyntheticFieldsForDstType.contains(Name))
+ copySyntheticField(SrcFieldLoc->getType(), *SrcFieldLoc,
+ Dst.getSyntheticField(Name), Env);
+ }
} else {
for (const FieldDecl *Field :
Env.getDataflowAnalysisContext().getModeledFields(TypeToCopy)) {
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index 552698a..dfcc7940 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -319,9 +319,12 @@ public:
Opts["__opencl_c_images"] = true;
Opts["__opencl_c_3d_image_writes"] = true;
Opts["cl_khr_3d_image_writes"] = true;
+ Opts["__opencl_c_program_scope_global_variables"] = true;
- Opts["__opencl_c_generic_address_space"] =
- GPUKind >= llvm::AMDGPU::GK_GFX700;
+ if (GPUKind >= llvm::AMDGPU::GK_GFX700) {
+ Opts["__opencl_c_generic_address_space"] = true;
+ Opts["__opencl_c_device_enqueue"] = true;
+ }
}
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index bbc45e5..24a5fc2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -221,10 +221,9 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca(
return initialAlloca;
}
-mlir::Value
-OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
- mlir::Value bound,
- mlir::Location loc, bool inverse) {
+std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop(
+ mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
+ mlir::Value bound, mlir::Location loc, bool inverse) {
mlir::Operation *bodyInsertLoc;
mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy);
@@ -249,7 +248,6 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad,
idxLoad);
-
};
auto forStmtBuilder = [&]() {
@@ -303,6 +301,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
if (subscriptedValue)
subscriptedValue = doSubscriptOp(subscriptedValue, load);
+ if (subscriptedValue2)
+ subscriptedValue2 = doSubscriptOp(subscriptedValue2, load);
bodyInsertLoc = builder.createYield(loc);
},
/*stepBuilder=*/
@@ -325,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue,
// Leave the insertion point to be inside the body, so we can loop over
// these things.
builder.setInsertionPoint(bodyInsertLoc);
- return subscriptedValue;
+ return {subscriptedValue, subscriptedValue2};
}
mlir::acc::ReductionOperator
@@ -434,7 +434,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
- QualType origType) {
+ QualType origType, bool emitInitExpr) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
@@ -464,14 +464,15 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
// initialize this variable correctly.
CIRGenFunction::AutoVarEmission tempDeclEmission =
cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint());
- cgf.emitAutoVarInit(tempDeclEmission);
+ if (emitInitExpr)
+ cgf.emitAutoVarInit(tempDeclEmission);
} else {
mlir::Value alloca = makeBoundsAlloca(
block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
// If the initializer is trivial, there is nothing to do here, so save
// ourselves some effort.
- if (allocaDecl->getInit() &&
+ if (emitInitExpr && allocaDecl->getInit() &&
(!cgf.isTrivialInitializer(allocaDecl->getInit()) ||
cgf.getContext().getLangOpts().getTrivialAutoVarInit() !=
LangOptions::TrivialAutoVarInitKind::Uninitialized))
@@ -484,35 +485,42 @@ void OpenACCRecipeBuilderBase::createInitRecipe(
void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- CIRGenFunction::AutoVarEmission tempDeclEmission,
- mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe,
- const VarDecl *temporary) {
- mlir::Block *block =
- createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc,
- /*numBounds=*/0, /*isInit=*/false);
- builder.setInsertionPointToEnd(&recipe.getCopyRegion().back());
+ const VarDecl *allocaDecl, const VarDecl *temporary,
+ mlir::Region &copyRegion, size_t numBounds) {
+ mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
+ builder.setInsertionPointToEnd(&copyRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
- mlir::BlockArgument fromArg = block->getArgument(0);
- mlir::BlockArgument toArg = block->getArgument(1);
+ mlir::Value fromArg = block->getArgument(0);
+ mlir::Value toArg = block->getArgument(1);
- mlir::Type elementTy =
- mlir::cast<cir::PointerType>(mainOp.getType()).getPointee();
+ llvm::MutableArrayRef<mlir::BlockArgument> boundsRange =
+ block->getArguments().drop_front(2);
- // Set the address of the emission to be the argument, so that we initialize
- // that instead of the variable in the other block.
- tempDeclEmission.setAllocatedAddress(
- Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
+ for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange))
+ std::tie(fromArg, toArg) =
+ createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false);
+
+ // Set up the 'to' address.
+ mlir::Type elementTy =
+ mlir::cast<cir::PointerType>(toArg.getType()).getPointee();
+ CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl);
tempDeclEmission.emittedAsOffload = true;
+ tempDeclEmission.setAllocatedAddress(
+ Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
+ // Set up the 'from' address from the temporary.
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary};
cgf.setAddrOfLocalVar(
temporary,
- Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)});
-
+ Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)});
cgf.emitAutoVarInit(tempDeclEmission);
+
+ builder.setInsertionPointToEnd(&copyRegion.back());
mlir::acc::YieldOp::create(builder, locEnd);
}
+
// This function generates the 'combiner' section for a reduction recipe. Note
// that this function is not 'insertion point' clean, in that it alters the
// insertion point to be inside of the 'combiner' section of the recipe, but
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index 21707ad..a5da744 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -49,14 +49,16 @@ protected:
// Creates a loop through an 'acc.bounds', leaving the 'insertion' point to be
// the inside of the loop body. Traverses LB->UB UNLESS `inverse` is set.
// Returns the 'subscriptedValue' changed with the new bounds subscript.
+ std::pair<mlir::Value, mlir::Value>
+ createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2,
+ mlir::Value bound, mlir::Location loc, bool inverse);
+
mlir::Value createBoundsLoop(mlir::Value subscriptedValue, mlir::Value bound,
- mlir::Location loc, bool inverse);
+ mlir::Location loc, bool inverse) {
+ return createBoundsLoop(subscriptedValue, {}, bound, loc, inverse).first;
+ }
+
mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op);
- void createFirstprivateRecipeCopy(
- mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- CIRGenFunction::AutoVarEmission tempDeclEmission,
- mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe,
- const VarDecl *temporary);
// This function generates the 'combiner' section for a reduction recipe. Note
// that this function is not 'insertion point' clean, in that it alters the
@@ -66,11 +68,19 @@ protected:
mlir::Value mainOp,
mlir::acc::ReductionRecipeOp recipe,
size_t numBounds);
+
void createInitRecipe(mlir::Location loc, mlir::Location locEnd,
SourceRange exprRange, mlir::Value mainOp,
mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes,
- const VarDecl *allocaDecl, QualType origType);
+ const VarDecl *allocaDecl, QualType origType,
+ bool emitInitExpr);
+
+ void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd,
+ mlir::Value mainOp,
+ const VarDecl *allocaDecl,
+ const VarDecl *temporary,
+ mlir::Region &copyRegion, size_t numBounds);
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp, CharUnits alignment,
@@ -150,63 +160,6 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
return recipeName;
}
- // Create the 'init' section of the recipe, including the 'copy' section for
- // 'firstprivate'. Note that this function is not 'insertion point' clean, in
- // that it alters the insertion point to be inside of the 'destroy' section of
- // the recipe, but doesn't restore it aftewards.
- void createRecipeInitCopy(mlir::Location loc, mlir::Location locEnd,
- SourceRange exprRange, mlir::Value mainOp,
- RecipeTy recipe, const VarDecl *varRecipe,
- const VarDecl *temporary) {
- // TODO: OpenACC: when we get the 'pointer' variants for
- // firstprivate/reduction, this probably should be removed/split into
- // functions for the BuilderBase.
- assert(varRecipe && "Required recipe variable not set?");
-
- CIRGenFunction::AutoVarEmission tempDeclEmission{
- CIRGenFunction::AutoVarEmission::invalid()};
- CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, varRecipe};
-
- // Do the 'init' section of the recipe IR, which does an alloca, then the
- // initialization (except for firstprivate).
- mlir::Block *block =
- createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc,
- /*numBounds=*/0, /*isInit=*/true);
- builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
- CIRGenFunction::LexicalScope ls(cgf, loc, block);
-
- tempDeclEmission =
- cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint());
-
- // 'firstprivate' doesn't do its initialization in the 'init' section,
- // instead it does it in the 'copy' section. SO, only do 'init' here for
- // reduction.
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) {
- // Unlike Private, the recipe here is always required as it has to do
- // init, not just 'default' init.
- if (!varRecipe->getInit())
- cgf.cgm.errorNYI(exprRange, "reduction init recipe");
- cgf.emitAutoVarInit(tempDeclEmission);
- }
-
- mlir::acc::YieldOp::create(builder, locEnd);
-
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
- if (!varRecipe->getInit()) {
- // If we don't have any initialization recipe, we failed during Sema to
- // initialize this correctly. If we disable the
- // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
- // emit an error to tell us. However, emitting those errors during
- // production is a violation of the standard, so we cannot do them.
- cgf.cgm.errorNYI(
- exprRange, "firstprivate copy-init recipe not properly generated");
- }
-
- createFirstprivateRecipeCopy(loc, locEnd, mainOp, tempDeclEmission,
- recipe, varRecipe, temporary);
- }
- }
-
public:
OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf,
CIRGen::CIRGenBuilderTy &builder)
@@ -221,19 +174,6 @@ public:
BuiltinType::ArraySection) &&
"array section shouldn't make it to recipe creation");
- // TODO: OpenACC: This is a bit of a hackery to get this to not change for
- // the non-private recipes. This will be removed soon, when we get this
- // 'right' for firstprivate and reduction.
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
- if (numBounds) {
- cgf.cgm.errorNYI(varRef->getSourceRange(),
- "firstprivate-init with bounds");
- }
- boundTypes = {};
- numBounds = 0;
- origType = baseType;
- }
-
mlir::ModuleOp mod = builder.getBlock()
->getParent()
->template getParentOfType<mlir::ModuleOp>();
@@ -262,21 +202,20 @@ public:
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
- origType);
+ origType, /*emitInitExpr=*/true);
} else if constexpr (std::is_same_v<RecipeTy,
mlir::acc::ReductionRecipeOp>) {
createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
- origType);
+ origType, /*emitInitExpr=*/true);
createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds);
} else {
static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>);
- // TODO: OpenACC: we probably want this to call createInitRecipe as well,
- // but do so in a way that omits the 'initialization', so that we can do
- // it separately, since it belongs in the 'copy' region. It also might
- // need a way of getting the tempDeclEmission out of it for that purpose.
- createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp,
- recipe, varRecipe, temporary);
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType, /*emitInitExpr=*/false);
+ createFirstprivateRecipeCopy(loc, locEnd, mainOp, varRecipe, temporary,
+ recipe.getCopyRegion(), numBounds);
}
if (origType.isDestructedType())
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index c15637d..2eeef81 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,18 +8,39 @@
#include "PassDetail.h"
#include "clang/AST/ASTContext.h"
+#include "clang/Basic/Module.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include "clang/CIR/Dialect/Passes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/Path.h"
#include <memory>
using namespace mlir;
using namespace cir;
+static SmallString<128> getTransformedFileName(mlir::ModuleOp mlirModule) {
+ SmallString<128> fileName;
+
+ if (mlirModule.getSymName())
+ fileName = llvm::sys::path::filename(mlirModule.getSymName()->str());
+
+ if (fileName.empty())
+ fileName = "<null>";
+
+ for (size_t i = 0; i < fileName.size(); ++i) {
+ // Replace everything that's not [a-zA-Z0-9._] with a _. This set happens
+ // to be the set of C preprocessing numbers.
+ if (!clang::isPreprocessingNumberBody(fileName[i]))
+ fileName[i] = '_';
+ }
+
+ return fileName;
+}
+
namespace {
struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
LoweringPreparePass() = default;
@@ -30,9 +51,16 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
void lowerComplexDivOp(cir::ComplexDivOp op);
void lowerComplexMulOp(cir::ComplexMulOp op);
void lowerUnaryOp(cir::UnaryOp op);
+ void lowerGlobalOp(cir::GlobalOp op);
void lowerArrayDtor(cir::ArrayDtor op);
void lowerArrayCtor(cir::ArrayCtor op);
+ /// Build the function that initializes the specified global
+ cir::FuncOp buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op);
+
+ /// Build a module init function that calls all the dynamic initializers.
+ void buildCXXGlobalInitFunc();
+
cir::FuncOp buildRuntimeFunction(
mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
cir::FuncType type,
@@ -47,6 +75,10 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
/// Tracks current module.
mlir::ModuleOp mlirModule;
+ /// Tracks existing dynamic initializers.
+ llvm::StringMap<uint32_t> dynamicInitializerNames;
+ llvm::SmallVector<cir::FuncOp> dynamicInitializers;
+
void setASTContext(clang::ASTContext *c) { astCtx = c; }
};
@@ -589,6 +621,111 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
op.erase();
}
+cir::FuncOp
+LoweringPreparePass::buildCXXGlobalVarDeclInitFunc(cir::GlobalOp op) {
+ // TODO(cir): Store this in the GlobalOp.
+ // This should come from the MangleContext, but for now I'm hardcoding it.
+ SmallString<256> fnName("__cxx_global_var_init");
+ // Get a unique name
+ uint32_t cnt = dynamicInitializerNames[fnName]++;
+ if (cnt)
+ fnName += "." + llvm::Twine(cnt).str();
+
+ // Create a variable initialization function.
+ CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointAfter(op);
+ auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+ FuncOp f = buildRuntimeFunction(builder, fnName, op.getLoc(), fnType,
+ cir::GlobalLinkageKind::InternalLinkage);
+
+ // Move over the initialzation code of the ctor region.
+ mlir::Block *entryBB = f.addEntryBlock();
+ if (!op.getCtorRegion().empty()) {
+ mlir::Block &block = op.getCtorRegion().front();
+ entryBB->getOperations().splice(entryBB->begin(), block.getOperations(),
+ block.begin(), std::prev(block.end()));
+ }
+
+ // Register the destructor call with __cxa_atexit
+ mlir::Region &dtorRegion = op.getDtorRegion();
+ if (!dtorRegion.empty()) {
+ assert(!cir::MissingFeatures::opGlobalDtorLowering());
+ llvm_unreachable("dtor region lowering is NYI");
+ }
+
+ // Replace cir.yield with cir.return
+ builder.setInsertionPointToEnd(entryBB);
+ mlir::Operation *yieldOp = nullptr;
+ if (!op.getCtorRegion().empty()) {
+ mlir::Block &block = op.getCtorRegion().front();
+ yieldOp = &block.getOperations().back();
+ } else {
+ assert(!cir::MissingFeatures::opGlobalDtorLowering());
+ llvm_unreachable("dtor region lowering is NYI");
+ }
+
+ assert(isa<YieldOp>(*yieldOp));
+ cir::ReturnOp::create(builder, yieldOp->getLoc());
+ return f;
+}
+
+void LoweringPreparePass::lowerGlobalOp(GlobalOp op) {
+ mlir::Region &ctorRegion = op.getCtorRegion();
+ mlir::Region &dtorRegion = op.getDtorRegion();
+
+ if (!ctorRegion.empty() || !dtorRegion.empty()) {
+ // Build a variable initialization function and move the initialzation code
+ // in the ctor region over.
+ cir::FuncOp f = buildCXXGlobalVarDeclInitFunc(op);
+
+ // Clear the ctor and dtor region
+ ctorRegion.getBlocks().clear();
+ dtorRegion.getBlocks().clear();
+
+ assert(!cir::MissingFeatures::astVarDeclInterface());
+ dynamicInitializers.push_back(f);
+ }
+
+ assert(!cir::MissingFeatures::opGlobalAnnotations());
+}
+
+void LoweringPreparePass::buildCXXGlobalInitFunc() {
+ if (dynamicInitializers.empty())
+ return;
+
+ assert(!cir::MissingFeatures::opGlobalCtorList());
+
+ SmallString<256> fnName;
+ // Include the filename in the symbol name. Including "sub_" matches gcc
+ // and makes sure these symbols appear lexicographically behind the symbols
+ // with priority (TBD). Module implementation units behave the same
+ // way as a non-modular TU with imports.
+ // TODO: check CXX20ModuleInits
+ if (astCtx->getCurrentNamedModule() &&
+ !astCtx->getCurrentNamedModule()->isModuleImplementation()) {
+ llvm::raw_svector_ostream out(fnName);
+ std::unique_ptr<clang::MangleContext> mangleCtx(
+ astCtx->createMangleContext());
+ cast<clang::ItaniumMangleContext>(*mangleCtx)
+ .mangleModuleInitializer(astCtx->getCurrentNamedModule(), out);
+ } else {
+ fnName += "_GLOBAL__sub_I_";
+ fnName += getTransformedFileName(mlirModule);
+ }
+
+ CIRBaseBuilderTy builder(getContext());
+ builder.setInsertionPointToEnd(&mlirModule.getBodyRegion().back());
+ auto fnType = cir::FuncType::get({}, builder.getVoidTy());
+ cir::FuncOp f =
+ buildRuntimeFunction(builder, fnName, mlirModule.getLoc(), fnType,
+ cir::GlobalLinkageKind::ExternalLinkage);
+ builder.setInsertionPointToStart(f.addEntryBlock());
+ for (cir::FuncOp &f : dynamicInitializers)
+ builder.createCallOp(f.getLoc(), f, {});
+
+ cir::ReturnOp::create(builder, f.getLoc());
+}
+
static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
clang::ASTContext *astCtx,
mlir::Operation *op, mlir::Type eltTy,
@@ -691,6 +828,8 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) {
lowerComplexDivOp(complexDiv);
else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
lowerComplexMulOp(complexMul);
+ else if (auto glob = mlir::dyn_cast<cir::GlobalOp>(op))
+ lowerGlobalOp(glob);
else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
lowerUnaryOp(unary);
}
@@ -704,12 +843,15 @@ void LoweringPreparePass::runOnOperation() {
op->walk([&](mlir::Operation *op) {
if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
- cir::ComplexMulOp, cir::ComplexDivOp, cir::UnaryOp>(op))
+ cir::ComplexMulOp, cir::ComplexDivOp, cir::GlobalOp,
+ cir::UnaryOp>(op))
opsToTransform.push_back(op);
});
for (mlir::Operation *o : opsToTransform)
runOnOp(o);
+
+ buildCXXGlobalInitFunc();
}
std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e6e4947..9f30287 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -6784,29 +6784,26 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) {
return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV;
}
-void CodeGenFunction::FlattenAccessAndType(
- Address Addr, QualType AddrType,
- SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
- SmallVectorImpl<QualType> &FlatTypes) {
- // WorkList is list of type we are processing + the Index List to access
- // the field of that type in Addr for use in a GEP
- llvm::SmallVector<std::pair<QualType, llvm::SmallVector<llvm::Value *, 4>>,
- 16>
+void CodeGenFunction::FlattenAccessAndTypeLValue(
+ LValue Val, SmallVectorImpl<LValue> &AccessList) {
+
+ llvm::SmallVector<
+ std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
WorkList;
llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32);
- // Addr should be a pointer so we need to 'dereference' it
- WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}});
+ WorkList.push_back({Val, Val.getType(), {llvm::ConstantInt::get(IdxTy, 0)}});
while (!WorkList.empty()) {
- auto [T, IdxList] = WorkList.pop_back_val();
+ auto [LVal, T, IdxList] = WorkList.pop_back_val();
T = T.getCanonicalType().getUnqualifiedType();
assert(!isa<MatrixType>(T) && "Matrix types not yet supported in HLSL");
+
if (const auto *CAT = dyn_cast<ConstantArrayType>(T)) {
uint64_t Size = CAT->getZExtSize();
for (int64_t I = Size - 1; I > -1; I--) {
llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
- WorkList.emplace_back(CAT->getElementType(), IdxListCopy);
+ WorkList.emplace_back(LVal, CAT->getElementType(), IdxListCopy);
}
} else if (const auto *RT = dyn_cast<RecordType>(T)) {
const RecordDecl *Record = RT->getOriginalDecl()->getDefinitionOrSelf();
@@ -6814,44 +6811,75 @@ void CodeGenFunction::FlattenAccessAndType(
const CXXRecordDecl *CXXD = dyn_cast<CXXRecordDecl>(Record);
- llvm::SmallVector<QualType, 16> FieldTypes;
+ llvm::SmallVector<
+ std::tuple<LValue, QualType, llvm::SmallVector<llvm::Value *, 4>>, 16>
+ ReverseList;
if (CXXD && CXXD->isStandardLayout())
Record = CXXD->getStandardLayoutBaseWithFields();
// deal with potential base classes
if (CXXD && !CXXD->isStandardLayout()) {
- for (auto &Base : CXXD->bases())
- FieldTypes.push_back(Base.getType());
+ if (CXXD->getNumBases() > 0) {
+ assert(CXXD->getNumBases() == 1 &&
+ "HLSL doesn't support multiple inheritance.");
+ auto Base = CXXD->bases_begin();
+ llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+ IdxListCopy.push_back(llvm::ConstantInt::get(
+ IdxTy, 0)); // base struct should be at index zero
+ ReverseList.emplace_back(LVal, Base->getType(), IdxListCopy);
+ }
}
- for (auto *FD : Record->fields())
- FieldTypes.push_back(FD->getType());
+ const CGRecordLayout &Layout = CGM.getTypes().getCGRecordLayout(Record);
- for (int64_t I = FieldTypes.size() - 1; I > -1; I--) {
- llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
- IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I));
- WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy});
+ llvm::Type *LLVMT = ConvertTypeForMem(T);
+ CharUnits Align = getContext().getTypeAlignInChars(T);
+ LValue RLValue;
+ bool createdGEP = false;
+ for (auto *FD : Record->fields()) {
+ if (FD->isBitField()) {
+ if (FD->isUnnamedBitField())
+ continue;
+ if (!createdGEP) {
+ createdGEP = true;
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+ LLVMT, Align, "gep");
+ RLValue = MakeAddrLValue(GEP, T);
+ }
+ LValue FieldLVal = EmitLValueForField(RLValue, FD, true);
+ ReverseList.push_back({FieldLVal, FD->getType(), {}});
+ } else {
+ llvm::SmallVector<llvm::Value *, 4> IdxListCopy = IdxList;
+ IdxListCopy.push_back(
+ llvm::ConstantInt::get(IdxTy, Layout.getLLVMFieldNo(FD)));
+ ReverseList.emplace_back(LVal, FD->getType(), IdxListCopy);
+ }
}
+
+ std::reverse(ReverseList.begin(), ReverseList.end());
+ llvm::append_range(WorkList, ReverseList);
} else if (const auto *VT = dyn_cast<VectorType>(T)) {
llvm::Type *LLVMT = ConvertTypeForMem(T);
CharUnits Align = getContext().getTypeAlignInChars(T);
- Address GEP =
- Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep");
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList, LLVMT,
+ Align, "vector.gep");
+ LValue Base = MakeAddrLValue(GEP, T);
for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) {
- llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I);
- // gep on vector fields is not recommended so combine gep with
- // extract/insert
- AccessList.emplace_back(GEP, Idx);
- FlatTypes.push_back(VT->getElementType());
+ llvm::Constant *Idx = llvm::ConstantInt::get(IdxTy, I);
+ LValue LV =
+ LValue::MakeVectorElt(Base.getAddress(), Idx, VT->getElementType(),
+ Base.getBaseInfo(), TBAAAccessInfo());
+ AccessList.emplace_back(LV);
}
- } else {
- // a scalar/builtin type
- llvm::Type *LLVMT = ConvertTypeForMem(T);
- CharUnits Align = getContext().getTypeAlignInChars(T);
- Address GEP =
- Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep");
- AccessList.emplace_back(GEP, nullptr);
- FlatTypes.push_back(T);
+ } else { // a scalar/builtin type
+ if (!IdxList.empty()) {
+ llvm::Type *LLVMT = ConvertTypeForMem(T);
+ CharUnits Align = getContext().getTypeAlignInChars(T);
+ Address GEP = Builder.CreateInBoundsGEP(LVal.getAddress(), IdxList,
+ LLVMT, Align, "gep");
+ AccessList.emplace_back(MakeAddrLValue(GEP, T));
+ } else // must be a bitfield we already created an lvalue for
+ AccessList.emplace_back(LVal);
}
}
}
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index b8150a2..07b9aeb 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -488,100 +488,62 @@ static bool isTrivialFiller(Expr *E) {
return false;
}
-static void EmitHLSLAggregateSplatCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, llvm::Value *SrcVal,
- QualType SrcTy, SourceLocation Loc) {
+// emit an elementwise cast where the RHS is a scalar or vector
+// or emit an aggregate splat cast
+static void EmitHLSLScalarElementwiseAndSplatCasts(CodeGenFunction &CGF,
+ LValue DestVal,
+ llvm::Value *SrcVal,
+ QualType SrcTy,
+ SourceLocation Loc) {
// Flatten our destination
- SmallVector<QualType> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
- assert(SrcTy->isScalarType() && "Invalid HLSL Aggregate splat cast.");
- for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; ++I) {
- llvm::Value *Cast =
- CGF.EmitScalarConversion(SrcVal, SrcTy, DestTypes[I], Loc);
-
- // store back
- llvm::Value *Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
- }
-}
-
-// emit a flat cast where the RHS is a scalar, including vector
-static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, llvm::Value *SrcVal,
- QualType SrcTy, SourceLocation Loc) {
- // Flatten our destination
- SmallVector<QualType, 16> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
-
- assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting.");
- const VectorType *VT = SrcTy->getAs<VectorType>();
- SrcTy = VT->getElementType();
- assert(StoreGEPList.size() <= VT->getNumElements() &&
- "Cannot perform HLSL flat cast when vector source \
- object has less elements than flattened destination \
- object.");
- for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) {
- llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load");
+ SmallVector<LValue, 16> StoreList;
+ CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
+
+ bool isVector = false;
+ if (auto *VT = SrcTy->getAs<VectorType>()) {
+ isVector = true;
+ SrcTy = VT->getElementType();
+ assert(StoreList.size() <= VT->getNumElements() &&
+ "Cannot perform HLSL flat cast when vector source \
+ object has less elements than flattened destination \
+ object.");
+ }
+
+ for (unsigned I = 0, Size = StoreList.size(); I < Size; I++) {
+ LValue DestLVal = StoreList[I];
+ llvm::Value *Load =
+ isVector ? CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load")
+ : SrcVal;
llvm::Value *Cast =
- CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc);
-
- // store back
- llvm::Value *Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+ CGF.EmitScalarConversion(Load, SrcTy, DestLVal.getType(), Loc);
+ CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
}
}
// emit a flat cast where the RHS is an aggregate
-static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal,
- QualType DestTy, Address SrcVal,
- QualType SrcTy, SourceLocation Loc) {
+static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue DestVal,
+ LValue SrcVal, SourceLocation Loc) {
// Flatten our destination
- SmallVector<QualType, 16> DestTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> StoreGEPList;
- // ^^ Flattened accesses to DestVal we want to store into
- CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes);
+ SmallVector<LValue, 16> StoreList;
+ CGF.FlattenAccessAndTypeLValue(DestVal, StoreList);
// Flatten our src
- SmallVector<QualType, 16> SrcTypes; // Flattened type
- SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
- // ^^ Flattened accesses to SrcVal we want to load from
- CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes);
+ SmallVector<LValue, 16> LoadList;
+ CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
- assert(StoreGEPList.size() <= LoadGEPList.size() &&
- "Cannot perform HLSL flat cast when flattened source object \
+ assert(StoreList.size() <= LoadList.size() &&
+ "Cannot perform HLSL elementwise cast when flattened source object \
has less elements than flattened destination object.");
- // apply casts to what we load from LoadGEPList
+ // apply casts to what we load from LoadList
// and store result in Dest
- for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) {
- llvm::Value *Idx = LoadGEPList[I].second;
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
- Load =
- Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
- llvm::Value *Cast =
- CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc);
-
- // store back
- Idx = StoreGEPList[I].second;
- if (Idx) {
- llvm::Value *V =
- CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert");
- Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx);
- }
- CGF.Builder.CreateStore(Cast, StoreGEPList[I].first);
+ for (unsigned I = 0, E = StoreList.size(); I < E; I++) {
+ LValue DestLVal = StoreList[I];
+ LValue SrcLVal = LoadList[I];
+ RValue RVal = CGF.EmitLoadOfLValue(SrcLVal, Loc);
+ assert(RVal.isScalar() && "All flattened source values should be scalars");
+ llvm::Value *Val = RVal.getScalarVal();
+ llvm::Value *Cast = CGF.EmitScalarConversion(Val, SrcLVal.getType(),
+ DestLVal.getType(), Loc);
+ CGF.EmitStoreThroughLValue(RValue::get(Cast), DestLVal);
}
}
@@ -988,31 +950,33 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
Expr *Src = E->getSubExpr();
QualType SrcTy = Src->getType();
RValue RV = CGF.EmitAnyExpr(Src);
- QualType DestTy = E->getType();
- Address DestVal = Dest.getAddress();
+ LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
SourceLocation Loc = E->getExprLoc();
- assert(RV.isScalar() && "RHS of HLSL splat cast must be a scalar.");
+ assert(RV.isScalar() && SrcTy->isScalarType() &&
+ "RHS of HLSL splat cast must be a scalar.");
llvm::Value *SrcVal = RV.getScalarVal();
- EmitHLSLAggregateSplatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
break;
}
case CK_HLSLElementwiseCast: {
Expr *Src = E->getSubExpr();
QualType SrcTy = Src->getType();
RValue RV = CGF.EmitAnyExpr(Src);
- QualType DestTy = E->getType();
- Address DestVal = Dest.getAddress();
+ LValue DestLVal = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
SourceLocation Loc = E->getExprLoc();
if (RV.isScalar()) {
llvm::Value *SrcVal = RV.getScalarVal();
- EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ assert(SrcTy->isVectorType() &&
+ "HLSL Elementwise cast doesn't handle splatting.");
+ EmitHLSLScalarElementwiseAndSplatCasts(CGF, DestLVal, SrcVal, SrcTy, Loc);
} else {
assert(RV.isAggregate() &&
"Can't perform HLSL Aggregate cast on a complex type.");
Address SrcVal = RV.getAggregateAddress();
- EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc);
+ EmitHLSLElementwiseCast(CGF, DestLVal, CGF.MakeAddrLValue(SrcVal, SrcTy),
+ Loc);
}
break;
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index c961222..06d9d81 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2397,39 +2397,37 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) {
}
// RHS is an aggregate type
-static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal,
- QualType RHSTy, QualType LHSTy,
- SourceLocation Loc) {
- SmallVector<std::pair<Address, llvm::Value *>, 16> LoadGEPList;
- SmallVector<QualType, 16> SrcTypes; // Flattened type
- CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes);
- // LHS is either a vector or a builtin?
+static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, LValue SrcVal,
+ QualType DestTy, SourceLocation Loc) {
+ SmallVector<LValue, 16> LoadList;
+ CGF.FlattenAccessAndTypeLValue(SrcVal, LoadList);
+ // Dest is either a vector or a builtin?
// if its a vector create a temp alloca to store into and return that
- if (auto *VecTy = LHSTy->getAs<VectorType>()) {
- assert(SrcTypes.size() >= VecTy->getNumElements() &&
- "Flattened type on RHS must have more elements than vector on LHS.");
+ if (auto *VecTy = DestTy->getAs<VectorType>()) {
+ assert(LoadList.size() >= VecTy->getNumElements() &&
+ "Flattened type on RHS must have the same number or more elements "
+ "than vector on LHS.");
llvm::Value *V =
- CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp"));
+ CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
// write to V.
for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) {
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load");
- llvm::Value *Idx = LoadGEPList[I].second;
- Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract")
- : Load;
- llvm::Value *Cast = CGF.EmitScalarConversion(
- Load, SrcTypes[I], VecTy->getElementType(), Loc);
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
+ assert(RVal.isScalar() &&
+ "All flattened source values should be scalars.");
+ llvm::Value *Cast =
+ CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[I].getType(),
+ VecTy->getElementType(), Loc);
V = CGF.Builder.CreateInsertElement(V, Cast, I);
}
return V;
}
- // i its a builtin just do an extract element or load.
- assert(LHSTy->isBuiltinType() &&
+ // if its a builtin just do an extract element or load.
+ assert(DestTy->isBuiltinType() &&
"Destination type must be a vector or builtin type.");
- llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load");
- llvm::Value *Idx = LoadGEPList[0].second;
- Load =
- Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load;
- return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc);
+ RValue RVal = CGF.EmitLoadOfLValue(LoadList[0], Loc);
+ assert(RVal.isScalar() && "All flattened source values should be scalars.");
+ return CGF.EmitScalarConversion(RVal.getScalarVal(), LoadList[0].getType(),
+ DestTy, Loc);
}
// VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts
@@ -2954,12 +2952,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
case CK_HLSLElementwiseCast: {
RValue RV = CGF.EmitAnyExpr(E);
SourceLocation Loc = CE->getExprLoc();
- QualType SrcTy = E->getType();
assert(RV.isAggregate() && "Not a valid HLSL Elementwise Cast.");
// RHS is an aggregate
- Address SrcVal = RV.getAggregateAddress();
- return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc);
+ LValue SrcVal = CGF.MakeAddrLValue(RV.getAggregateAddress(), E->getType());
+ return EmitHLSLElementwiseCast(CGF, SrcVal, DestTy, Loc);
}
} // end of switch
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8cda583..fa94692 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6808,12 +6808,13 @@ public:
/// they were computed by collectAttachPtrExprInfo(), if they are semantically
/// different.
struct AttachPtrExprComparator {
- const MappableExprsHandler *Handler = nullptr;
+ const MappableExprsHandler &Handler;
// Cache of previous equality comparison results.
mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
CachedEqualityComparisons;
- AttachPtrExprComparator(const MappableExprsHandler *H) : Handler(H) {}
+ AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
+ AttachPtrExprComparator() = delete;
// Return true iff LHS is "less than" RHS.
bool operator()(const Expr *LHS, const Expr *RHS) const {
@@ -6821,15 +6822,15 @@ public:
return false;
// First, compare by complexity (depth)
- const auto ItLHS = Handler->AttachPtrComponentDepthMap.find(LHS);
- const auto ItRHS = Handler->AttachPtrComponentDepthMap.find(RHS);
+ const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
+ const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
std::optional<size_t> DepthLHS =
- (ItLHS != Handler->AttachPtrComponentDepthMap.end()) ? ItLHS->second
- : std::nullopt;
+ (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
+ : std::nullopt;
std::optional<size_t> DepthRHS =
- (ItRHS != Handler->AttachPtrComponentDepthMap.end()) ? ItRHS->second
- : std::nullopt;
+ (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
+ : std::nullopt;
// std::nullopt (no attach pointer) has lowest complexity
if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
@@ -6877,8 +6878,8 @@ public:
/// Returns true iff LHS was computed before RHS by
/// collectAttachPtrExprInfo().
bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
- const size_t &OrderLHS = Handler->AttachPtrComputationOrderMap.at(LHS);
- const size_t &OrderRHS = Handler->AttachPtrComputationOrderMap.at(RHS);
+ const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
+ const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
return OrderLHS < OrderRHS;
}
@@ -6897,7 +6898,7 @@ public:
if (!LHS || !RHS)
return false;
- ASTContext &Ctx = Handler->CGF.getContext();
+ ASTContext &Ctx = Handler.CGF.getContext();
// Strip away parentheses and no-op casts to get to the core expression
LHS = LHS->IgnoreParenNoopCasts(Ctx);
RHS = RHS->IgnoreParenNoopCasts(Ctx);
@@ -7246,6 +7247,10 @@ private:
llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
{nullptr, 0}};
+ /// An instance of attach-ptr-expr comparator that can be used throughout the
+ /// lifetime of this handler.
+ AttachPtrExprComparator AttachPtrComparator;
+
llvm::Value *getExprTypeSize(const Expr *E) const {
QualType ExprTy = E->getType().getCanonicalType();
@@ -8963,7 +8968,7 @@ private:
public:
MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
- : CurDir(&Dir), CGF(CGF) {
+ : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
// Extract firstprivate clause information.
for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
for (const auto *D : C->varlist())
@@ -9009,7 +9014,7 @@ public:
/// Constructor for the declare mapper directive.
MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
- : CurDir(&Dir), CGF(CGF) {}
+ : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
/// Generate code for the combined entry if we have a partially mapped struct
/// and take care of the mapping flags of the arguments corresponding to
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index f0565c1..99de6e1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4464,10 +4464,8 @@ public:
AggValueSlot slot = AggValueSlot::ignored());
LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e);
- void FlattenAccessAndType(
- Address Addr, QualType AddrTy,
- SmallVectorImpl<std::pair<Address, llvm::Value *>> &AccessList,
- SmallVectorImpl<QualType> &FlatTypes);
+ void FlattenAccessAndTypeLValue(LValue LVal,
+ SmallVectorImpl<LValue> &AccessList);
llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface,
const ObjCIvarDecl *Ivar);
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f6f7f22..8d019d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -493,10 +493,15 @@ CodeGenModule::CodeGenModule(ASTContext &C,
auto ReaderOrErr = llvm::IndexedInstrProfReader::create(
CodeGenOpts.ProfileInstrumentUsePath, *FS,
CodeGenOpts.ProfileRemappingFile);
- // We're checking for profile read errors in CompilerInvocation, so if
- // there was an error it should've already been caught. If it hasn't been
- // somehow, trip an assertion.
- assert(ReaderOrErr);
+ if (auto E = ReaderOrErr.takeError()) {
+ unsigned DiagID = Diags.getCustomDiagID(
+ DiagnosticsEngine::Error, "Error in reading profile %0: %1");
+ llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+ Diags.Report(DiagID)
+ << CodeGenOpts.ProfileInstrumentUsePath << EI.message();
+ });
+ return;
+ }
PGOReader = std::move(ReaderOrErr.get());
}
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 684cc09..107b9ff 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -41,6 +41,7 @@
#include "llvm/Frontend/Debug/Options.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
@@ -485,19 +486,47 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
}
if (ProfileUseArg) {
+ SmallString<128> UsePathBuf;
+ StringRef UsePath;
if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
- CmdArgs.push_back(Args.MakeArgString(
- Twine("-fprofile-instrument-use-path=") + ProfileUseArg->getValue()));
+ UsePath = ProfileUseArg->getValue();
else if ((ProfileUseArg->getOption().matches(
options::OPT_fprofile_use_EQ) ||
ProfileUseArg->getOption().matches(
options::OPT_fprofile_instr_use))) {
- SmallString<128> Path(
- ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
- if (Path.empty() || llvm::sys::fs::is_directory(Path))
- llvm::sys::path::append(Path, "default.profdata");
+ UsePathBuf =
+ ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue();
+ if (UsePathBuf.empty() || llvm::sys::fs::is_directory(UsePathBuf))
+ llvm::sys::path::append(UsePathBuf, "default.profdata");
+ UsePath = UsePathBuf;
+ }
+ auto ReaderOrErr =
+ llvm::IndexedInstrProfReader::create(UsePath, D.getVFS());
+ if (auto E = ReaderOrErr.takeError()) {
+ auto DiagID = D.getDiags().getCustomDiagID(
+ DiagnosticsEngine::Error, "Error in reading profile %0: %1");
+ llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
+ D.Diag(DiagID) << UsePath.str() << EI.message();
+ });
+ } else {
+ std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
+ std::move(ReaderOrErr.get());
+ StringRef UseKind;
+ // Currently memprof profiles are only added at the IR level. Mark the
+ // profile type as IR in that case as well and the subsequent matching
+ // needs to detect which is available (might be one or both).
+ if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
+ if (PGOReader->hasCSIRLevelProfile())
+ UseKind = "csllvm";
+ else
+ UseKind = "llvm";
+ } else
+ UseKind = "clang";
+
+ CmdArgs.push_back(
+ Args.MakeArgString("-fprofile-instrument-use=" + UseKind));
CmdArgs.push_back(
- Args.MakeArgString(Twine("-fprofile-instrument-use-path=") + Path));
+ Args.MakeArgString("-fprofile-instrument-use-path=" + UsePath));
}
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 4223752..50fd50a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -533,9 +533,9 @@ static T extractMaskValue(T KeyPath) {
#define PARSE_OPTION_WITH_MARSHALLING( \
ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, \
ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \
- METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \
- IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \
- TABLE_INDEX) \
+ METAVAR, VALUES, SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \
+ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \
+ MERGER, EXTRACTOR, TABLE_INDEX) \
if ((VISIBILITY) & options::CC1Option) { \
KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \
if (IMPLIED_CHECK) \
@@ -551,8 +551,9 @@ static T extractMaskValue(T KeyPath) {
#define GENERATE_OPTION_WITH_MARSHALLING( \
CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \
FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \
- SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \
- IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \
+ SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \
+ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \
+ TABLE_INDEX) \
if ((VISIBILITY) & options::CC1Option) { \
[&](const auto &Extracted) { \
if (ALWAYS_EMIT || \
@@ -1473,34 +1474,6 @@ static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) {
return Buffer;
}
-// Set the profile kind using fprofile-instrument-use-path.
-static void setPGOUseInstrumentor(CodeGenOptions &Opts,
- const Twine &ProfileName,
- llvm::vfs::FileSystem &FS,
- DiagnosticsEngine &Diags) {
- auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName, FS);
- if (auto E = ReaderOrErr.takeError()) {
- unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
- "Error in reading profile %0: %1");
- llvm::handleAllErrors(std::move(E), [&](const llvm::ErrorInfoBase &EI) {
- Diags.Report(DiagID) << ProfileName.str() << EI.message();
- });
- return;
- }
- std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
- std::move(ReaderOrErr.get());
- // Currently memprof profiles are only added at the IR level. Mark the profile
- // type as IR in that case as well and the subsequent matching needs to detect
- // which is available (might be one or both).
- if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
- if (PGOReader->hasCSIRLevelProfile())
- Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileCSIRInstr);
- else
- Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileIRInstr);
- } else
- Opts.setProfileUse(llvm::driver::ProfileInstrKind::ProfileClangInstr);
-}
-
void CompilerInvocation::setDefaultPointerAuthOptions(
PointerAuthOptions &Opts, const LangOptions &LangOpts,
const llvm::Triple &Triple) {
@@ -5090,16 +5063,10 @@ bool CompilerInvocation::CreateFromArgsImpl(
append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs);
}
- // Set PGOOptions. Need to create a temporary VFS to read the profile
- // to determine the PGO type.
- if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty()) {
- auto FS =
- createVFSFromOverlayFiles(Res.getHeaderSearchOpts().VFSOverlayFiles,
- Diags, llvm::vfs::getRealFileSystem());
- setPGOUseInstrumentor(Res.getCodeGenOpts(),
- Res.getCodeGenOpts().ProfileInstrumentUsePath, *FS,
- Diags);
- }
+ if (!Res.getCodeGenOpts().ProfileInstrumentUsePath.empty() &&
+ Res.getCodeGenOpts().getProfileUse() ==
+ llvm::driver::ProfileInstrKind::ProfileNone)
+ Diags.Report(diag::err_drv_profile_instrument_use_path_with_no_kind);
FixupInvocation(Res, Diags, Args, DashX);
diff --git a/clang/lib/Headers/avx512fp16intrin.h b/clang/lib/Headers/avx512fp16intrin.h
index 4bd7981..d951ba0 100644
--- a/clang/lib/Headers/avx512fp16intrin.h
+++ b/clang/lib/Headers/avx512fp16intrin.h
@@ -41,7 +41,8 @@ typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
-static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
+static __inline__ _Float16 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_cvtsh_h(__m512h __a) {
return __a[0];
}
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 754f43a..965741f 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7330,9 +7330,8 @@ _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi32_epi8 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi32_epi8(__m128i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
@@ -7360,9 +7359,8 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi32_epi8 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi32_epi8(__m256i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v8si)__A, __v8qi),
(__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
@@ -7370,8 +7368,7 @@ _mm256_cvtepi32_epi8 (__m256i __A)
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
-{
+_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) {
return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
(__v16qi) __O, __M);
}
@@ -7390,9 +7387,8 @@ _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi32_epi16 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi32_epi16(__m128i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
2, 3, 4, 5, 6, 7);
@@ -7419,9 +7415,8 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi32_epi16 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi32_epi16(__m256i __A) {
return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
}
@@ -7446,9 +7441,8 @@ _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi8 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi8(__m128i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
@@ -7475,9 +7469,8 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi8 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi8(__m256i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
@@ -7504,9 +7497,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi32 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi32(__m128i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
}
@@ -7532,23 +7524,20 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi32 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi32(__m256i __A) {
return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm256_cvtepi64_epi32(__A),
(__v4si)__O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
(__v4si)_mm256_cvtepi64_epi32(__A),
(__v4si)_mm_setzero_si128());
@@ -7560,9 +7549,8 @@ _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
__builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_cvtepi64_epi16 (__m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_cvtepi64_epi16(__m128i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
3, 3, 3, 3);
@@ -7590,9 +7578,8 @@ _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
__builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS256
-_mm256_cvtepi64_epi16 (__m256i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_cvtepi64_epi16(__m256i __A) {
return (__m128i)__builtin_shufflevector(
__builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
2, 3, 4, 5, 6, 7);
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 11d2d5c..999e302c 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -1049,6 +1049,7 @@ ExprResult ConstraintSatisfactionChecker::Evaluate(
case NormalizedConstraint::ConstraintKind::Compound:
return Evaluate(static_cast<const CompoundConstraint &>(Constraint), MLTAL);
}
+ llvm_unreachable("Unknown ConstraintKind enum");
}
static bool CheckConstraintSatisfaction(
@@ -2141,6 +2142,7 @@ bool SubstituteParameterMappings::substitute(NormalizedConstraint &N) {
return substitute(Compound.getRHS());
}
}
+ llvm_unreachable("Unknown ConstraintKind enum");
}
} // namespace
@@ -2561,7 +2563,6 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) {
};
switch (NC.getKind()) {
-
case NormalizedConstraint::ConstraintKind::Atomic:
return {{find(&static_cast<const AtomicConstraint &>(NC))}};
@@ -2601,6 +2602,7 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) {
return Res;
}
}
+ llvm_unreachable("Unknown ConstraintKind enum");
}
void SubsumptionChecker::AddUniqueClauseToFormula(Formula &F, Clause C) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 0069b08..6eaf7b9 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -11041,17 +11041,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
<< CUDA().getConfigureFuncName();
Context.setcudaConfigureCallDecl(NewFD);
}
-
- // Variadic functions, other than a *declaration* of printf, are not allowed
- // in device-side CUDA code, unless someone passed
- // -fcuda-allow-variadic-functions.
- if (!getLangOpts().CUDAAllowVariadicFunctions && NewFD->isVariadic() &&
- (NewFD->hasAttr<CUDADeviceAttr>() ||
- NewFD->hasAttr<CUDAGlobalAttr>()) &&
- !(II && II->isStr("printf") && NewFD->isExternC() &&
- !D.isFunctionDefinition())) {
- Diag(NewFD->getLocation(), diag::err_variadic_device_fn);
- }
}
MarkUnusedFileScopedDecl(NewFD);
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index fa30c66b..2b375b9 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3571,9 +3571,6 @@ bool SemaHLSL::CanPerformAggregateSplatCast(Expr *Src, QualType DestTy) {
if (SrcVecTy)
SrcTy = SrcVecTy->getElementType();
- if (ContainsBitField(DestTy))
- return false;
-
llvm::SmallVector<QualType> DestTypes;
BuildFlattenedTypeList(DestTy, DestTypes);
@@ -3600,9 +3597,6 @@ bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) {
(DestTy->isScalarType() || DestTy->isVectorType()))
return false;
- if (ContainsBitField(DestTy) || ContainsBitField(SrcTy))
- return false;
-
llvm::SmallVector<QualType> DestTypes;
BuildFlattenedTypeList(DestTy, DestTypes);
llvm::SmallVector<QualType> SrcTypes;
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 922fcac..543db46 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -3920,6 +3920,7 @@ bool InitializationSequence::isAmbiguous() const {
case FK_AddressOfUnaddressableFunction:
case FK_ParenthesizedListInitFailed:
case FK_DesignatedInitForNonAggregate:
+ case FK_HLSLInitListFlatteningFailed:
return false;
case FK_ReferenceInitOverloadFailed:
@@ -4882,8 +4883,10 @@ static void TryListInitialization(Sema &S,
bool TreatUnavailableAsInvalid) {
QualType DestType = Entity.getType();
- if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList))
+ if (S.getLangOpts().HLSL && !S.HLSL().transformInitList(Entity, InitList)) {
+ Sequence.SetFailed(InitializationSequence::FK_HLSLInitListFlatteningFailed);
return;
+ }
// C++ doesn't allow scalar initialization with more than one argument.
// But C99 complex numbers are scalars and it makes sense there.
@@ -6817,33 +6820,18 @@ void InitializationSequence::InitializeFrom(Sema &S,
assert(Args.size() >= 1 && "Zero-argument case handled above");
// For HLSL ext vector types we allow list initialization behavior for C++
- // constructor syntax. This is accomplished by converting initialization
- // arguments an InitListExpr late.
+ // functional cast expressions which look like constructor syntax. This is
+ // accomplished by converting initialization arguments to InitListExpr.
if (S.getLangOpts().HLSL && Args.size() > 1 && DestType->isExtVectorType() &&
(SourceType.isNull() ||
!Context.hasSameUnqualifiedType(SourceType, DestType))) {
-
- llvm::SmallVector<Expr *> InitArgs;
- for (auto *Arg : Args) {
- if (Arg->getType()->isExtVectorType()) {
- const auto *VTy = Arg->getType()->castAs<ExtVectorType>();
- unsigned Elm = VTy->getNumElements();
- for (unsigned Idx = 0; Idx < Elm; ++Idx) {
- InitArgs.emplace_back(new (Context) ArraySubscriptExpr(
- Arg,
- IntegerLiteral::Create(
- Context, llvm::APInt(Context.getIntWidth(Context.IntTy), Idx),
- Context.IntTy, SourceLocation()),
- VTy->getElementType(), Arg->getValueKind(), Arg->getObjectKind(),
- SourceLocation()));
- }
- } else
- InitArgs.emplace_back(Arg);
- }
- InitListExpr *ILE = new (Context) InitListExpr(
- S.getASTContext(), SourceLocation(), InitArgs, SourceLocation());
+ InitListExpr *ILE = new (Context)
+ InitListExpr(S.getASTContext(), Args.front()->getBeginLoc(), Args,
+ Args.back()->getEndLoc());
+ ILE->setType(DestType);
Args[0] = ILE;
- AddListInitializationStep(DestType);
+ TryListInitialization(S, Entity, Kind, ILE, *this,
+ TreatUnavailableAsInvalid);
return;
}
@@ -9301,6 +9289,14 @@ bool InitializationSequence::Diagnose(Sema &S,
break;
}
+ case InitializationSequence::FK_HLSLInitListFlatteningFailed: {
+ // Unlike C/C++ list initialization, there is no fallback if it fails. This
+ // allows us to diagnose the failure when it happens in the
+ // TryListInitialization call instead of delaying the diagnosis, which is
+ // beneficial because the flattening is also expensive.
+ break;
+ }
+
case FK_ExplicitConstructor: {
S.Diag(Kind.getLocation(), diag::err_selected_explicit_constructor)
<< Args[0]->getSourceRange();
@@ -9499,6 +9495,10 @@ void InitializationSequence::dump(raw_ostream &OS) const {
case FK_DesignatedInitForNonAggregate:
OS << "designated initializer for non-aggregate type";
break;
+
+ case FK_HLSLInitListFlatteningFailed:
+ OS << "HLSL initialization list flattening failed";
+ break;
}
OS << '\n';
return;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 7ad7049..8471f02 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2724,16 +2724,6 @@ Expr *GenerateReductionInitRecipeExpr(ASTContext &Context,
return InitExpr;
}
-const Expr *StripOffBounds(const Expr *VarExpr) {
- while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) {
- if (const auto *AS = dyn_cast<ArraySectionExpr>(VarExpr))
- VarExpr = AS->getBase()->IgnoreParenImpCasts();
- else if (const auto *Sub = dyn_cast<ArraySubscriptExpr>(VarExpr))
- VarExpr = Sub->getBase()->IgnoreParenImpCasts();
- }
- return VarExpr;
-}
-
VarDecl *CreateAllocaDecl(ASTContext &Ctx, DeclContext *DC,
SourceLocation BeginLoc, IdentifierInfo *VarName,
QualType VarTy) {
@@ -2794,17 +2784,18 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) {
OpenACCFirstPrivateRecipe
SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
- // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe
- VarExpr = StripOffBounds(VarExpr);
-
+ // We don't strip bounds here, so that we are doing our recipe init at the
+ // 'lowest' possible level. Codegen is going to have to do its own 'looping'.
if (!VarExpr || VarExpr->getType()->isDependentType())
return OpenACCFirstPrivateRecipe::Empty();
QualType VarTy =
VarExpr->getType().getNonReferenceType().getUnqualifiedType();
- // TODO: OpenACC: for arrays/bounds versions, we're going to have to do a
- // different initializer, but for now we can go ahead with this.
+ // Array sections are special, and we have to treat them that way.
+ if (const auto *ASE =
+ dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts()))
+ VarTy = ArraySectionExpr::getBaseOriginalType(ASE);
VarDecl *AllocaDecl = CreateAllocaDecl(
getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(),
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index b870114..5657dfe 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -4413,14 +4413,23 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
Result = CompareStandardConversionSequences(S, Loc,
ICS1.Standard, ICS2.Standard);
else if (ICS1.isUserDefined()) {
+ // With lazy template loading, it is possible to find non-canonical
+ // FunctionDecls, depending on when redecl chains are completed. Make sure
+ // to compare the canonical decls of conversion functions. This avoids
+ // ambiguity problems for templated conversion operators.
+ const FunctionDecl *ConvFunc1 = ICS1.UserDefined.ConversionFunction;
+ if (ConvFunc1)
+ ConvFunc1 = ConvFunc1->getCanonicalDecl();
+ const FunctionDecl *ConvFunc2 = ICS2.UserDefined.ConversionFunction;
+ if (ConvFunc2)
+ ConvFunc2 = ConvFunc2->getCanonicalDecl();
// User-defined conversion sequence U1 is a better conversion
// sequence than another user-defined conversion sequence U2 if
// they contain the same user-defined conversion function or
// constructor and if the second standard conversion sequence of
// U1 is better than the second standard conversion sequence of
// U2 (C++ 13.3.3.2p3).
- if (ICS1.UserDefined.ConversionFunction ==
- ICS2.UserDefined.ConversionFunction)
+ if (ConvFunc1 == ConvFunc2)
Result = CompareStandardConversionSequences(S, Loc,
ICS1.UserDefined.After,
ICS2.UserDefined.After);
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
index e1f4d0d..b0096d8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScannerImpl.cpp
@@ -509,6 +509,8 @@ bool initializeScanCompilerInstance(
ScanInstance.getFrontendOpts().DisableFree = false;
ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
+ ScanInstance.getFrontendOpts().GenReducedBMI = false;
+ ScanInstance.getFrontendOpts().ModuleOutputPath.clear();
// This will prevent us compiling individual modules asynchronously since
// FileManager is not thread-safe, but it does improve performance for now.
ScanInstance.getFrontendOpts().ModulesShareFileManager = true;
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index d67178c..a117bec 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -263,6 +263,10 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) {
// units.
CI.getFrontendOpts().Inputs.clear();
CI.getFrontendOpts().OutputFile.clear();
+ CI.getFrontendOpts().GenReducedBMI = false;
+ CI.getFrontendOpts().ModuleOutputPath.clear();
+ CI.getHeaderSearchOpts().ModulesSkipHeaderSearchPaths = false;
+ CI.getHeaderSearchOpts().ModulesSkipDiagnosticOptions = false;
// LLVM options are not going to affect the AST
CI.getFrontendOpts().LLVMArgs.clear();