diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/AST/ByteCode/Interp.cpp | 43 | ||||
-rw-r--r-- | clang/lib/AST/ByteCode/InterpBuiltin.cpp | 29 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp | 60 | ||||
-rw-r--r-- | clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h | 109 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 63 | ||||
-rw-r--r-- | clang/lib/Sema/SemaConcept.cpp | 4 | ||||
-rw-r--r-- | clang/lib/Sema/SemaDecl.cpp | 11 | ||||
-rw-r--r-- | clang/lib/Sema/SemaOpenACC.cpp | 21 | ||||
-rw-r--r-- | clang/lib/Sema/SemaOverload.cpp | 13 |
9 files changed, 143 insertions, 210 deletions
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 21af3d6..8904396 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1638,6 +1638,36 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, return true; } +static bool GetDynamicDecl(InterpState &S, CodePtr OpPC, Pointer TypePtr, + const CXXRecordDecl *&DynamicDecl) { + while (TypePtr.isBaseClass()) + TypePtr = TypePtr.getBase(); + + QualType DynamicType = TypePtr.getType(); + if (TypePtr.isStatic() || TypePtr.isConst()) { + const VarDecl *VD = TypePtr.getDeclDesc()->asVarDecl(); + if (!VD->isConstexpr()) { + const Expr *E = S.Current->getExpr(OpPC); + APValue V = TypePtr.toAPValue(S.getASTContext()); + QualType TT = S.getASTContext().getLValueReferenceType(DynamicType); + S.FFDiag(E, diag::note_constexpr_polymorphic_unknown_dynamic_type) + << AccessKinds::AK_MemberCall << V.getAsString(S.getASTContext(), TT); + return false; + } + } + + if (DynamicType->isPointerType() || DynamicType->isReferenceType()) { + DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); + } else if (DynamicType->isArrayType()) { + const Type *ElemType = DynamicType->getPointeeOrArrayElementType(); + assert(ElemType); + DynamicDecl = ElemType->getAsCXXRecordDecl(); + } else { + DynamicDecl = DynamicType->getAsCXXRecordDecl(); + } + return true; +} + bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, uint32_t VarArgSize) { assert(Func->hasThisPointer()); @@ -1662,17 +1692,8 @@ bool CallVirt(InterpState &S, CodePtr OpPC, const Function *Func, } const CXXRecordDecl *DynamicDecl = nullptr; - { - Pointer TypePtr = ThisPtr; - while (TypePtr.isBaseClass()) - TypePtr = TypePtr.getBase(); - - QualType DynamicType = TypePtr.getType(); - if (DynamicType->isPointerType() || DynamicType->isReferenceType()) - DynamicDecl = DynamicType->getPointeeCXXRecordDecl(); - else - DynamicDecl = DynamicType->getAsCXXRecordDecl(); - } + if (!GetDynamicDecl(S, OpPC, ThisPtr, DynamicDecl)) + return false; assert(DynamicDecl); const auto *StaticDecl = cast<CXXRecordDecl>(Func->getParentDecl()); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 68ebfdf..a3c4ba5 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -736,25 +736,6 @@ static bool interp__builtin_expect(InterpState &S, CodePtr OpPC, return true; } -/// rotateleft(value, amount) -static bool interp__builtin_rotate(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call, bool Right) { - APSInt Amount = popToAPSInt(S, Call->getArg(1)); - APSInt Value = popToAPSInt(S, Call->getArg(0)); - - APSInt Result; - if (Right) - Result = APSInt(Value.rotr(Amount.urem(Value.getBitWidth())), - /*IsUnsigned=*/true); - else // Left. - Result = APSInt(Value.rotl(Amount.urem(Value.getBitWidth())), - /*IsUnsigned=*/true); - - pushInteger(S, Result, Call->getType()); - return true; -} - static bool interp__builtin_ffs(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const CallExpr *Call) { @@ -3160,7 +3141,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI_rotl: case Builtin::BI_lrotl: case Builtin::BI_rotl64: - return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/false); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt { + return Value.rotl(Amount); + }); case Builtin::BI__builtin_rotateright8: case Builtin::BI__builtin_rotateright16: @@ -3171,7 +3155,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case Builtin::BI_rotr: case Builtin::BI_lrotr: case Builtin::BI_rotr64: - return interp__builtin_rotate(S, OpPC, Frame, Call, /*Right=*/true); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Value, const APSInt &Amount) -> APInt { + return Value.rotr(Amount); + }); case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index bbc45e5..24a5fc2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -221,10 +221,9 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( return initialAlloca; } -mlir::Value -OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, - mlir::Value bound, - mlir::Location loc, bool inverse) { +std::pair<mlir::Value, mlir::Value> OpenACCRecipeBuilderBase::createBoundsLoop( + mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse) { mlir::Operation *bodyInsertLoc; mlir::Type itrTy = cgf.cgm.convertType(cgf.getContext().UnsignedLongLongTy); @@ -249,7 +248,6 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, return cir::PtrStrideOp::create(builder, loc, eltLoad.getType(), eltLoad, idxLoad); - }; auto forStmtBuilder = [&]() { @@ -303,6 +301,8 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, if (subscriptedValue) subscriptedValue = doSubscriptOp(subscriptedValue, load); + if (subscriptedValue2) + subscriptedValue2 = doSubscriptOp(subscriptedValue2, load); bodyInsertLoc = builder.createYield(loc); }, /*stepBuilder=*/ @@ -325,7 +325,7 @@ OpenACCRecipeBuilderBase::createBoundsLoop(mlir::Value subscriptedValue, // Leave the insertion point to be inside the body, so we can loop over // these things. builder.setInsertionPoint(bodyInsertLoc); - return subscriptedValue; + return {subscriptedValue, subscriptedValue2}; } mlir::acc::ReductionOperator @@ -434,7 +434,7 @@ void OpenACCRecipeBuilderBase::createInitRecipe( mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl, - QualType origType) { + QualType origType, bool emitInitExpr) { assert(allocaDecl && "Required recipe variable not set?"); CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl}; @@ -464,14 +464,15 @@ void OpenACCRecipeBuilderBase::createInitRecipe( // initialize this variable correctly. CIRGenFunction::AutoVarEmission tempDeclEmission = cgf.emitAutoVarAlloca(*allocaDecl, builder.saveInsertionPoint()); - cgf.emitAutoVarInit(tempDeclEmission); + if (emitInitExpr) + cgf.emitAutoVarInit(tempDeclEmission); } else { mlir::Value alloca = makeBoundsAlloca( block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes); // If the initializer is trivial, there is nothing to do here, so save // ourselves some effort. - if (allocaDecl->getInit() && + if (emitInitExpr && allocaDecl->getInit() && (!cgf.isTrivialInitializer(allocaDecl->getInit()) || cgf.getContext().getLangOpts().getTrivialAutoVarInit() != LangOptions::TrivialAutoVarInitKind::Uninitialized)) @@ -484,35 +485,42 @@ void OpenACCRecipeBuilderBase::createInitRecipe( void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy( mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - mlir::Block *block = - createRecipeBlock(recipe.getCopyRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/false); - builder.setInsertionPointToEnd(&recipe.getCopyRegion().back()); + const VarDecl *allocaDecl, const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds) { + mlir::Block *block = createRecipeBlock(copyRegion, mainOp.getType(), loc, + numBounds, /*isInit=*/false); + builder.setInsertionPointToEnd(©Region.back()); CIRGenFunction::LexicalScope ls(cgf, loc, block); - mlir::BlockArgument fromArg = block->getArgument(0); - mlir::BlockArgument toArg = block->getArgument(1); + mlir::Value fromArg = block->getArgument(0); + mlir::Value toArg = block->getArgument(1); - mlir::Type elementTy = - mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); + llvm::MutableArrayRef<mlir::BlockArgument> boundsRange = + block->getArguments().drop_front(2); - // Set the address of the emission to be the argument, so that we initialize - // that instead of the variable in the other block. - tempDeclEmission.setAllocatedAddress( - Address{toArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); + for (mlir::BlockArgument boundArg : llvm::reverse(boundsRange)) + std::tie(fromArg, toArg) = + createBoundsLoop(fromArg, toArg, boundArg, loc, /*inverse=*/false); + + // Set up the 'to' address. + mlir::Type elementTy = + mlir::cast<cir::PointerType>(toArg.getType()).getPointee(); + CIRGenFunction::AutoVarEmission tempDeclEmission(*allocaDecl); tempDeclEmission.emittedAsOffload = true; + tempDeclEmission.setAllocatedAddress( + Address{toArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); + // Set up the 'from' address from the temporary. CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, temporary}; cgf.setAddrOfLocalVar( temporary, - Address{fromArg, elementTy, cgf.getContext().getDeclAlign(varRecipe)}); - + Address{fromArg, elementTy, cgf.getContext().getDeclAlign(allocaDecl)}); cgf.emitAutoVarInit(tempDeclEmission); + + builder.setInsertionPointToEnd(©Region.back()); mlir::acc::YieldOp::create(builder, locEnd); } + // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the // insertion point to be inside of the 'combiner' section of the recipe, but diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h index 21707ad..a5da744 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h @@ -49,14 +49,16 @@ protected: // Creates a loop through an 'acc.bounds', leaving the 'insertion' point to be // the inside of the loop body. Traverses LB->UB UNLESS `inverse` is set. // Returns the 'subscriptedValue' changed with the new bounds subscript. + std::pair<mlir::Value, mlir::Value> + createBoundsLoop(mlir::Value subscriptedValue, mlir::Value subscriptedValue2, + mlir::Value bound, mlir::Location loc, bool inverse); + mlir::Value createBoundsLoop(mlir::Value subscriptedValue, mlir::Value bound, - mlir::Location loc, bool inverse); + mlir::Location loc, bool inverse) { + return createBoundsLoop(subscriptedValue, {}, bound, loc, inverse).first; + } + mlir::acc::ReductionOperator convertReductionOp(OpenACCReductionOperator op); - void createFirstprivateRecipeCopy( - mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, - CIRGenFunction::AutoVarEmission tempDeclEmission, - mlir::acc::FirstprivateRecipeOp recipe, const VarDecl *varRecipe, - const VarDecl *temporary); // This function generates the 'combiner' section for a reduction recipe. Note // that this function is not 'insertion point' clean, in that it alters the @@ -66,11 +68,19 @@ protected: mlir::Value mainOp, mlir::acc::ReductionRecipeOp recipe, size_t numBounds); + void createInitRecipe(mlir::Location loc, mlir::Location locEnd, SourceRange exprRange, mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds, llvm::ArrayRef<QualType> boundTypes, - const VarDecl *allocaDecl, QualType origType); + const VarDecl *allocaDecl, QualType origType, + bool emitInitExpr); + + void createFirstprivateRecipeCopy(mlir::Location loc, mlir::Location locEnd, + mlir::Value mainOp, + const VarDecl *allocaDecl, + const VarDecl *temporary, + mlir::Region ©Region, size_t numBounds); void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp, CharUnits alignment, @@ -150,63 +160,6 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase { return recipeName; } - // Create the 'init' section of the recipe, including the 'copy' section for - // 'firstprivate'. Note that this function is not 'insertion point' clean, in - // that it alters the insertion point to be inside of the 'destroy' section of - // the recipe, but doesn't restore it aftewards. - void createRecipeInitCopy(mlir::Location loc, mlir::Location locEnd, - SourceRange exprRange, mlir::Value mainOp, - RecipeTy recipe, const VarDecl *varRecipe, - const VarDecl *temporary) { - // TODO: OpenACC: when we get the 'pointer' variants for - // firstprivate/reduction, this probably should be removed/split into - // functions for the BuilderBase. - assert(varRecipe && "Required recipe variable not set?"); - - CIRGenFunction::AutoVarEmission tempDeclEmission{ - CIRGenFunction::AutoVarEmission::invalid()}; - CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, varRecipe}; - - // Do the 'init' section of the recipe IR, which does an alloca, then the - // initialization (except for firstprivate). - mlir::Block *block = - createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc, - /*numBounds=*/0, /*isInit=*/true); - builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); - CIRGenFunction::LexicalScope ls(cgf, loc, block); - - tempDeclEmission = - cgf.emitAutoVarAlloca(*varRecipe, builder.saveInsertionPoint()); - - // 'firstprivate' doesn't do its initialization in the 'init' section, - // instead it does it in the 'copy' section. SO, only do 'init' here for - // reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { - // Unlike Private, the recipe here is always required as it has to do - // init, not just 'default' init. - if (!varRecipe->getInit()) - cgf.cgm.errorNYI(exprRange, "reduction init recipe"); - cgf.emitAutoVarInit(tempDeclEmission); - } - - mlir::acc::YieldOp::create(builder, locEnd); - - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (!varRecipe->getInit()) { - // If we don't have any initialization recipe, we failed during Sema to - // initialize this correctly. If we disable the - // Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll - // emit an error to tell us. However, emitting those errors during - // production is a violation of the standard, so we cannot do them. - cgf.cgm.errorNYI( - exprRange, "firstprivate copy-init recipe not properly generated"); - } - - createFirstprivateRecipeCopy(loc, locEnd, mainOp, tempDeclEmission, - recipe, varRecipe, temporary); - } - } - public: OpenACCRecipeBuilder(CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder) @@ -221,19 +174,6 @@ public: BuiltinType::ArraySection) && "array section shouldn't make it to recipe creation"); - // TODO: OpenACC: This is a bit of a hackery to get this to not change for - // the non-private recipes. This will be removed soon, when we get this - // 'right' for firstprivate and reduction. - if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) { - if (numBounds) { - cgf.cgm.errorNYI(varRef->getSourceRange(), - "firstprivate-init with bounds"); - } - boundTypes = {}; - numBounds = 0; - origType = baseType; - } - mlir::ModuleOp mod = builder.getBlock() ->getParent() ->template getParentOfType<mlir::ModuleOp>(); @@ -262,21 +202,20 @@ public: if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); } else if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, recipe.getInitRegion(), numBounds, boundTypes, varRecipe, - origType); + origType, /*emitInitExpr=*/true); createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds); } else { static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>); - // TODO: OpenACC: we probably want this to call createInitRecipe as well, - // but do so in a way that omits the 'initialization', so that we can do - // it separately, since it belongs in the 'copy' region. It also might - // need a way of getting the tempDeclEmission out of it for that purpose. - createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp, - recipe, varRecipe, temporary); + createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp, + recipe.getInitRegion(), numBounds, boundTypes, varRecipe, + origType, /*emitInitExpr=*/false); + createFirstprivateRecipeCopy(loc, locEnd, mainOp, varRecipe, temporary, + recipe.getCopyRegion(), numBounds); } if (origType.isDestructedType()) diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 754f43a..965741f 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7330,9 +7330,8 @@ _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7360,9 +7359,8 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v8si)__A, __v8qi), (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -7370,8 +7368,7 @@ _mm256_cvtepi32_epi8 (__m256i __A) } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) -{ +_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } @@ -7390,9 +7387,8 @@ _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi32_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi32_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); @@ -7419,9 +7415,8 @@ _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi32_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi32_epi16(__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } @@ -7446,9 +7441,8 @@ _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi8 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi8(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); @@ -7475,9 +7469,8 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi8 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi8(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); @@ -7504,9 +7497,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi32 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi32(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); } @@ -7532,23 +7524,20 @@ _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi32 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi32(__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm256_cvtepi64_epi32(__A), (__v4si)_mm_setzero_si128()); @@ -7560,9 +7549,8 @@ _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtepi64_epi16 (__m128i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_cvtepi64_epi16(__m128i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 3, 3, 3, 3); @@ -7590,9 +7578,8 @@ _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtepi64_epi16 (__m256i __A) -{ +static __inline__ __m128i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_cvtepi64_epi16(__m256i __A) { return (__m128i)__builtin_shufflevector( __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7); diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index 11d2d5c..999e302c 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -1049,6 +1049,7 @@ ExprResult ConstraintSatisfactionChecker::Evaluate( case NormalizedConstraint::ConstraintKind::Compound: return Evaluate(static_cast<const CompoundConstraint &>(Constraint), MLTAL); } + llvm_unreachable("Unknown ConstraintKind enum"); } static bool CheckConstraintSatisfaction( @@ -2141,6 +2142,7 @@ bool SubstituteParameterMappings::substitute(NormalizedConstraint &N) { return substitute(Compound.getRHS()); } } + llvm_unreachable("Unknown ConstraintKind enum"); } } // namespace @@ -2561,7 +2563,6 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) { }; switch (NC.getKind()) { - case NormalizedConstraint::ConstraintKind::Atomic: return {{find(&static_cast<const AtomicConstraint &>(NC))}}; @@ -2601,6 +2602,7 @@ FormulaType SubsumptionChecker::Normalize(const NormalizedConstraint &NC) { return Res; } } + llvm_unreachable("Unknown ConstraintKind enum"); } void SubsumptionChecker::AddUniqueClauseToFormula(Formula &F, Clause C) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0069b08..6eaf7b9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11041,17 +11041,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << CUDA().getConfigureFuncName(); Context.setcudaConfigureCallDecl(NewFD); } - - // Variadic functions, other than a *declaration* of printf, are not allowed - // in device-side CUDA code, unless someone passed - // -fcuda-allow-variadic-functions. - if (!getLangOpts().CUDAAllowVariadicFunctions && NewFD->isVariadic() && - (NewFD->hasAttr<CUDADeviceAttr>() || - NewFD->hasAttr<CUDAGlobalAttr>()) && - !(II && II->isStr("printf") && NewFD->isExternC() && - !D.isFunctionDefinition())) { - Diag(NewFD->getLocation(), diag::err_variadic_device_fn); - } } MarkUnusedFileScopedDecl(NewFD); diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 7ad7049..8471f02 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -2724,16 +2724,6 @@ Expr *GenerateReductionInitRecipeExpr(ASTContext &Context, return InitExpr; } -const Expr *StripOffBounds(const Expr *VarExpr) { - while (isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(VarExpr)) { - if (const auto *AS = dyn_cast<ArraySectionExpr>(VarExpr)) - VarExpr = AS->getBase()->IgnoreParenImpCasts(); - else if (const auto *Sub = dyn_cast<ArraySubscriptExpr>(VarExpr)) - VarExpr = Sub->getBase()->IgnoreParenImpCasts(); - } - return VarExpr; -} - VarDecl *CreateAllocaDecl(ASTContext &Ctx, DeclContext *DC, SourceLocation BeginLoc, IdentifierInfo *VarName, QualType VarTy) { @@ -2794,17 +2784,18 @@ OpenACCPrivateRecipe SemaOpenACC::CreatePrivateInitRecipe(const Expr *VarExpr) { OpenACCFirstPrivateRecipe SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) { - // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe - VarExpr = StripOffBounds(VarExpr); - + // We don't strip bounds here, so that we are doing our recipe init at the + // 'lowest' possible level. Codegen is going to have to do its own 'looping'. if (!VarExpr || VarExpr->getType()->isDependentType()) return OpenACCFirstPrivateRecipe::Empty(); QualType VarTy = VarExpr->getType().getNonReferenceType().getUnqualifiedType(); - // TODO: OpenACC: for arrays/bounds versions, we're going to have to do a - // different initializer, but for now we can go ahead with this. + // Array sections are special, and we have to treat them that way. + if (const auto *ASE = + dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts())) + VarTy = ArraySectionExpr::getBaseOriginalType(ASE); VarDecl *AllocaDecl = CreateAllocaDecl( getASTContext(), SemaRef.getCurContext(), VarExpr->getBeginLoc(), diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index b870114..5657dfe 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -4413,14 +4413,23 @@ CompareImplicitConversionSequences(Sema &S, SourceLocation Loc, Result = CompareStandardConversionSequences(S, Loc, ICS1.Standard, ICS2.Standard); else if (ICS1.isUserDefined()) { + // With lazy template loading, it is possible to find non-canonical + // FunctionDecls, depending on when redecl chains are completed. Make sure + // to compare the canonical decls of conversion functions. This avoids + // ambiguity problems for templated conversion operators. + const FunctionDecl *ConvFunc1 = ICS1.UserDefined.ConversionFunction; + if (ConvFunc1) + ConvFunc1 = ConvFunc1->getCanonicalDecl(); + const FunctionDecl *ConvFunc2 = ICS2.UserDefined.ConversionFunction; + if (ConvFunc2) + ConvFunc2 = ConvFunc2->getCanonicalDecl(); // User-defined conversion sequence U1 is a better conversion // sequence than another user-defined conversion sequence U2 if // they contain the same user-defined conversion function or // constructor and if the second standard conversion sequence of // U1 is better than the second standard conversion sequence of // U2 (C++ 13.3.3.2p3). - if (ICS1.UserDefined.ConversionFunction == - ICS2.UserDefined.ConversionFunction) + if (ConvFunc1 == ConvFunc2) Result = CompareStandardConversionSequences(S, Loc, ICS1.UserDefined.After, ICS2.UserDefined.After); |