diff options
Diffstat (limited to 'clang')
63 files changed, 1879 insertions, 337 deletions
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 270b5d3..79dc0b2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -432,6 +432,9 @@ Bug Fixes to C++ Support - Fix an assertion failure when taking the address on a non-type template parameter argument of object type. (#GH151531) - Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409). +- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610) +- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196) +- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def index d3dff446..089a72b 100644 --- a/clang/include/clang/Basic/AMDGPUTypes.def +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -21,6 +21,7 @@ #endif AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8) +AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_texture_t", AMDGPUTexture, AMDGPUTextureTy, 256, 256, 0) AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 9aad00b..b856ad1 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -34,6 +34,7 @@ // Q -> target builtin type, followed by a character to distinguish the builtin type // Qa -> AArch64 svcount_t builtin type. // Qb -> AMDGPU __amdgpu_buffer_rsrc_t builtin type. +// Qt -> AMDGPU __amdgpu_texture_t builtin type. // E -> ext_vector, followed by the number of elements and the base type. // X -> _Complex, followed by the base type. // Y -> ptrdiff_t diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 4d9e123..c724136 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning< def err_capture_default_first : Error< "capture default must be first">; def ext_decl_attrs_on_lambda : ExtWarn< - "%select{an attribute specifier sequence|%0}1 in this position " + "%select{an attribute specifier sequence|%1}0 in this position " "is a C++23 extension">, InGroup<CXX23AttrsOnLambda>; def ext_lambda_missing_parens : ExtWarn< "lambda without a parameter clause is a C++23 extension">, diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index bb39444..e1be08c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [ //===----------------------------------------------------------------------===// defvar CIR_YieldableScopes = [ - "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp", - "SwitchOp", "TernaryOp", "WhileOp" + "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp", + "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp" ]; def CIR_YieldOp : CIR_Op<"yield", [ @@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr< // is upstreamed. def CIR_GlobalOp : CIR_Op<"global", [ - DeclareOpInterfaceMethods<CIRGlobalValueInterface> + DeclareOpInterfaceMethods<RegionBranchOpInterface>, + DeclareOpInterfaceMethods<CIRGlobalValueInterface>, + NoRegionArguments ]> { let summary = "Declare or define a global variable"; let description = [{ @@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [ UnitAttr:$dso_local, OptionalAttr<I64Attr>:$alignment); + let regions = (region MaxSizedRegion<1>:$ctorRegion, + MaxSizedRegion<1>:$dtorRegion); + let assemblyFormat = [{ ($sym_visibility^)? (`` $global_visibility^)? @@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [ (`comdat` $comdat^)? (`dso_local` $dso_local^)? $sym_name - custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value) + custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value, + $ctorRegion, $dtorRegion) attr-dict }]; let extraClassDeclaration = [{ - bool isDeclaration() { return !getInitialValue(); } + bool isDeclaration() { + return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty(); + } bool hasInitializer() { return !isDeclaration(); } }]; let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins - "llvm::StringRef":$sym_name, - "mlir::Type":$sym_type, - CArg<"bool", "false">:$isConstant, - // CIR defaults to external linkage. - CArg<"cir::GlobalLinkageKind", - "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>]; + let builders = [ + OpBuilder<(ins + "llvm::StringRef":$sym_name, + "mlir::Type":$sym_type, + CArg<"bool", "false">:$isConstant, + // CIR defaults to external linkage. + CArg<"cir::GlobalLinkageKind", + "cir::GlobalLinkageKind::ExternalLinkage">:$linkage, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$ctorBuilder, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$dtorBuilder) + > + ]; let hasVerifier = 1; diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 0fac1b2..7e59989 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -208,6 +208,7 @@ struct MissingFeatures { static bool dataLayoutTypeAllocSize() { return false; } static bool dataLayoutTypeStoreSize() { return false; } static bool deferredCXXGlobalInit() { return false; } + static bool deleteArray() { return false; } static bool devirtualizeMemberFunction() { return false; } static bool ehCleanupFlags() { return false; } static bool ehCleanupScope() { return false; } @@ -219,6 +220,7 @@ struct MissingFeatures { static bool emitCondLikelihoodViaExpectIntrinsic() { return false; } static bool emitLifetimeMarkers() { return false; } static bool emitLValueAlignmentAssumption() { return false; } + static bool emitNullCheckForDeleteCalls() { return false; } static bool emitNullabilityCheck() { return false; } static bool emitTypeCheck() { return false; } static bool emitTypeMetadataCodeForVCall() { return false; } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 61dd330..0fd0e7e 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12590,6 +12590,10 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, Type = Context.AMDGPUBufferRsrcTy; break; } + case 't': { + Type = Context.AMDGPUTextureTy; + break; + } default: llvm_unreachable("Unexpected target builtin type"); } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 891344d..a2e97fc 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1294,95 +1294,6 @@ static bool interp__builtin_assume_aligned(InterpState &S, CodePtr OpPC, return true; } -static bool interp__builtin_ia32_bextr(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Index = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Shift = Index.extractBitsAsZExtValue(8, 0); - uint64_t Length = Index.extractBitsAsZExtValue(8, 8); - Length = Length > BitWidth ? BitWidth : Length; - - // Handle out of bounds cases. - if (Length == 0 || Shift >= BitWidth) { - pushInteger(S, 0, Call->getType()); - return true; - } - - uint64_t Result = Val.getZExtValue() >> Shift; - Result &= llvm::maskTrailingOnes<uint64_t>(Length); - pushInteger(S, Result, Call->getType()); - return true; -} - -static bool interp__builtin_ia32_bzhi(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - QualType CallType = Call->getType(); - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType() || - !CallType->isIntegerType()) - return false; - - APSInt Idx = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); - - if (Index < BitWidth) - Val.clearHighBits(BitWidth - Index); - - pushInteger(S, Val, CallType); - return true; -} - -static bool interp__builtin_ia32_pdep(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(I, Val[P++]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - -static bool interp__builtin_ia32_pext(InterpState &S, CodePtr OpPC, - const InterpFrame *Frame, - const CallExpr *Call) { - if (Call->getNumArgs() != 2 || !Call->getArg(0)->getType()->isIntegerType() || - !Call->getArg(1)->getType()->isIntegerType()) - return false; - - APSInt Mask = popToAPSInt(S, Call->getArg(1)); - APSInt Val = popToAPSInt(S, Call->getArg(0)); - - unsigned BitWidth = Val.getBitWidth(); - APInt Result = APInt::getZero(BitWidth); - for (unsigned I = 0, P = 0; I != BitWidth; ++I) { - if (Mask[I]) - Result.setBitVal(P++, Val[I]); - } - pushInteger(S, std::move(Result), Call->getType()); - return true; -} - /// (CarryIn, LHS, RHS, Result) static bool interp__builtin_ia32_addcarry_subborrow(InterpState &S, CodePtr OpPC, @@ -3275,11 +3186,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_bextr_u64: case clang::X86::BI__builtin_ia32_bextri_u32: case clang::X86::BI__builtin_ia32_bextri_u64: - return interp__builtin_ia32_bextr(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Shift = Idx.extractBitsAsZExtValue(8, 0); + uint64_t Length = Idx.extractBitsAsZExtValue(8, 8); + if (Length > BitWidth) { + Length = BitWidth; + } + + // Handle out of bounds cases. + if (Length == 0 || Shift >= BitWidth) + return APInt(BitWidth, 0); + + uint64_t Result = Val.getZExtValue() >> Shift; + Result &= llvm::maskTrailingOnes<uint64_t>(Length); + return APInt(BitWidth, Result); + }); case clang::X86::BI__builtin_ia32_bzhi_si: case clang::X86::BI__builtin_ia32_bzhi_di: - return interp__builtin_ia32_bzhi(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Idx) { + unsigned BitWidth = Val.getBitWidth(); + uint64_t Index = Idx.extractBitsAsZExtValue(8, 0); + APSInt Result = Val; + + if (Index < BitWidth) + Result.clearHighBits(BitWidth - Index); + + return Result; + }); case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: @@ -3299,11 +3236,33 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pdep_si: case clang::X86::BI__builtin_ia32_pdep_di: - return interp__builtin_ia32_pdep(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(I, Val[P++]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_pext_si: case clang::X86::BI__builtin_ia32_pext_di: - return interp__builtin_ia32_pext(S, OpPC, Frame, Call); + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &Val, const APSInt &Mask) { + unsigned BitWidth = Val.getBitWidth(); + APInt Result = APInt::getZero(BitWidth); + + for (unsigned I = 0, P = 0; I != BitWidth; ++I) { + if (Mask[I]) + Result.setBitVal(P++, Val[I]); + } + + return Result; + }); case clang::X86::BI__builtin_ia32_addcarryx_u32: case clang::X86::BI__builtin_ia32_addcarryx_u64: diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index af89b66..cd738ce 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -262,6 +262,7 @@ public: case Storage::Typeid: return false; } + llvm_unreachable("Unknown clang::interp::Storage enum"); } /// Checks if the pointer is live. bool isLive() const { diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index cb8fe6c..9d12a13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass( bool nullCheckValue, SourceLocation loc) { assert(!path.empty() && "Base path should not be empty!"); + CastExpr::path_const_iterator start = path.begin(); + const CXXRecordDecl *vBase = nullptr; + if ((*path.begin())->isVirtual()) { - // The implementation here is actually complete, but let's flag this - // as an error until the rest of the virtual base class support is in place. - cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base"); - return Address::invalid(); + vBase = (*start)->getType()->castAsCXXRecordDecl(); + ++start; } // Compute the static offset of the ultimate destination within its // allocating subobject (the virtual base, if there is one, or else // the "complete" object that we see). - CharUnits nonVirtualOffset = - cgm.computeNonVirtualBaseClassOffset(derived, path); + CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset( + vBase ? vBase : derived, {start, path.end()}); + + // If there's a virtual step, we can sometimes "devirtualize" it. + // For now, that's limited to when the derived type is final. + // TODO: "devirtualize" this for accesses to known-complete objects. + if (vBase && derived->hasAttr<FinalAttr>()) { + const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived); + CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase); + nonVirtualOffset += vBaseOffset; + vBase = nullptr; // we no longer have a virtual step + } // Get the base pointer type. mlir::Type baseValueTy = convertType((path.end()[-1])->getType()); assert(!cir::MissingFeatures::addressSpace()); - // The if statement here is redundant now, but it will be needed when we add - // support for virtual base classes. // If there is no virtual base, use cir.base_class_addr. It takes care of // the adjustment and the null pointer check. - if (nonVirtualOffset.isZero()) { + if (nonVirtualOffset.isZero() && !vBase) { assert(!cir::MissingFeatures::sanitizers()); return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0, /*assumeNotNull=*/true); @@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass( assert(!cir::MissingFeatures::sanitizers()); - // Apply the offset - value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, - nonVirtualOffset.getQuantity(), - /*assumeNotNull=*/true); + // Compute the virtual offset. + mlir::Value virtualOffset = nullptr; + if (vBase) { + virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset( + getLoc(loc), *this, value, derived, vBase); + } + + // Apply both offsets. + value = applyNonVirtualAndVirtualOffset( + getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived, + vBase, baseValueTy, not nullCheckValue); // Cast to the destination type. value = value.withElementType(builder, baseValueTy); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 1f7e3dd..83208bf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -210,6 +210,60 @@ RValue CIRGenFunction::emitCXXMemberOrOperatorCall( return emitCall(fnInfo, callee, returnValue, args, nullptr, loc); } +namespace { +/// The parameters to pass to a usual operator delete. +struct UsualDeleteParams { + TypeAwareAllocationMode typeAwareDelete = TypeAwareAllocationMode::No; + bool destroyingDelete = false; + bool size = false; + AlignedAllocationMode alignment = AlignedAllocationMode::No; +}; +} // namespace + +// FIXME(cir): this should be shared with LLVM codegen +static UsualDeleteParams getUsualDeleteParams(const FunctionDecl *fd) { + UsualDeleteParams params; + + const FunctionProtoType *fpt = fd->getType()->castAs<FunctionProtoType>(); + auto ai = fpt->param_type_begin(), ae = fpt->param_type_end(); + + if (fd->isTypeAwareOperatorNewOrDelete()) { + params.typeAwareDelete = TypeAwareAllocationMode::Yes; + assert(ai != ae); + ++ai; + } + + // The first argument after the type-identity parameter (if any) is + // always a void* (or C* for a destroying operator delete for class + // type C). + ++ai; + + // The next parameter may be a std::destroying_delete_t. + if (fd->isDestroyingOperatorDelete()) { + params.destroyingDelete = true; + assert(ai != ae); + ++ai; + } + + // Figure out what other parameters we should be implicitly passing. + if (ai != ae && (*ai)->isIntegerType()) { + params.size = true; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + if (ai != ae && (*ai)->isAlignValT()) { + params.alignment = AlignedAllocationMode::Yes; + ++ai; + } else { + assert(!isTypeAwareAllocation(params.typeAwareDelete)); + } + + assert(ai == ae && "unexpected usual deallocation function parameter"); + return params; +} + static mlir::Value emitCXXNewAllocSize(CIRGenFunction &cgf, const CXXNewExpr *e, unsigned minElements, mlir::Value &numElements, @@ -332,6 +386,117 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, return rv; } +namespace { +/// Calls the given 'operator delete' on a single object. +struct CallObjectDelete final : EHScopeStack::Cleanup { + mlir::Value ptr; + const FunctionDecl *operatorDelete; + QualType elementType; + + CallObjectDelete(mlir::Value ptr, const FunctionDecl *operatorDelete, + QualType elementType) + : ptr(ptr), operatorDelete(operatorDelete), elementType(elementType) {} + + void emit(CIRGenFunction &cgf) override { + cgf.emitDeleteCall(operatorDelete, ptr, elementType); + } + + // This is a placeholder until EHCleanupScope is implemented. + size_t getSize() const override { + assert(!cir::MissingFeatures::ehCleanupScope()); + return sizeof(CallObjectDelete); + } +}; +} // namespace + +/// Emit the code for deleting a single object. +static void emitObjectDelete(CIRGenFunction &cgf, const CXXDeleteExpr *de, + Address ptr, QualType elementType) { + // C++11 [expr.delete]p3: + // If the static type of the object to be deleted is different from its + // dynamic type, the static type shall be a base class of the dynamic type + // of the object to be deleted and the static type shall have a virtual + // destructor or the behavior is undefined. + assert(!cir::MissingFeatures::emitTypeCheck()); + + const FunctionDecl *operatorDelete = de->getOperatorDelete(); + assert(!operatorDelete->isDestroyingOperatorDelete()); + + // Find the destructor for the type, if applicable. If the + // destructor is virtual, we'll just emit the vcall and return. + const CXXDestructorDecl *dtor = nullptr; + if (const auto *rd = elementType->getAsCXXRecordDecl()) { + if (rd->hasDefinition() && !rd->hasTrivialDestructor()) { + dtor = rd->getDestructor(); + + if (dtor->isVirtual()) { + cgf.cgm.errorNYI(de->getSourceRange(), + "emitObjectDelete: virtual destructor"); + } + } + } + + // Make sure that we call delete even if the dtor throws. + // This doesn't have to a conditional cleanup because we're going + // to pop it off in a second. + cgf.ehStack.pushCleanup<CallObjectDelete>( + NormalAndEHCleanup, ptr.getPointer(), operatorDelete, elementType); + + if (dtor) { + cgf.emitCXXDestructorCall(dtor, Dtor_Complete, + /*ForVirtualBase=*/false, + /*Delegating=*/false, ptr, elementType); + } else if (elementType.getObjCLifetime()) { + assert(!cir::MissingFeatures::objCLifetime()); + cgf.cgm.errorNYI(de->getSourceRange(), "emitObjectDelete: ObjCLifetime"); + } + + // In traditional LLVM codegen null checks are emitted to save a delete call. + // In CIR we optimize for size by default, the null check should be added into + // this function callers. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + + cgf.popCleanupBlock(); +} + +void CIRGenFunction::emitCXXDeleteExpr(const CXXDeleteExpr *e) { + const Expr *arg = e->getArgument(); + Address ptr = emitPointerWithAlignment(arg); + + // Null check the pointer. + // + // We could avoid this null check if we can determine that the object + // destruction is trivial and doesn't require an array cookie; we can + // unconditionally perform the operator delete call in that case. For now, we + // assume that deleted pointers are null rarely enough that it's better to + // keep the branch. This might be worth revisiting for a -O0 code size win. + // + // CIR note: emit the code size friendly by default for now, such as mentioned + // in `emitObjectDelete`. + assert(!cir::MissingFeatures::emitNullCheckForDeleteCalls()); + QualType deleteTy = e->getDestroyedType(); + + // A destroying operator delete overrides the entire operation of the + // delete expression. + if (e->getOperatorDelete()->isDestroyingOperatorDelete()) { + cgm.errorNYI(e->getSourceRange(), + "emitCXXDeleteExpr: destroying operator delete"); + return; + } + + // We might be deleting a pointer to array. + deleteTy = getContext().getBaseElementType(deleteTy); + ptr = ptr.withElementType(builder, convertTypeForMem(deleteTy)); + + if (e->isArrayForm()) { + assert(!cir::MissingFeatures::deleteArray()); + cgm.errorNYI(e->getSourceRange(), "emitCXXDeleteExpr: array delete"); + return; + } else { + emitObjectDelete(*this, e, ptr, deleteTy); + } +} + mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { // The element type being allocated. QualType allocType = getContext().getBaseElementType(e->getAllocatedType()); @@ -443,3 +608,53 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *e) { allocSizeWithoutCookie); return result.getPointer(); } + +void CIRGenFunction::emitDeleteCall(const FunctionDecl *deleteFD, + mlir::Value ptr, QualType deleteTy) { + assert(!cir::MissingFeatures::deleteArray()); + + const auto *deleteFTy = deleteFD->getType()->castAs<FunctionProtoType>(); + CallArgList deleteArgs; + + UsualDeleteParams params = getUsualDeleteParams(deleteFD); + auto paramTypeIt = deleteFTy->param_type_begin(); + + // Pass std::type_identity tag if present + if (isTypeAwareAllocation(params.typeAwareDelete)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: type aware delete"); + + // Pass the pointer itself. + QualType argTy = *paramTypeIt++; + mlir::Value deletePtr = + builder.createBitcast(ptr.getLoc(), ptr, convertType(argTy)); + deleteArgs.add(RValue::get(deletePtr), argTy); + + // Pass the std::destroying_delete tag if present. + if (params.destroyingDelete) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: destroying delete"); + + // Pass the size if the delete function has a size_t parameter. + if (params.size) { + QualType sizeType = *paramTypeIt++; + CharUnits deleteTypeSize = getContext().getTypeSizeInChars(deleteTy); + assert(mlir::isa<cir::IntType>(convertType(sizeType)) && + "expected cir::IntType"); + cir::ConstantOp size = builder.getConstInt( + *currSrcLoc, convertType(sizeType), deleteTypeSize.getQuantity()); + + deleteArgs.add(RValue::get(size), sizeType); + } + + // Pass the alignment if the delete function has an align_val_t parameter. + if (isAlignedAllocation(params.alignment)) + cgm.errorNYI(deleteFD->getSourceRange(), + "emitDeleteCall: aligned allocation"); + + assert(paramTypeIt == deleteFTy->param_type_end() && + "unknown parameter to usual delete function"); + + // Emit the call to delete. + emitNewDeleteCall(*this, deleteFD, deleteFTy, deleteArgs); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index bd09d78..f4bbced 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -676,6 +676,10 @@ public: mlir::Value VisitRealImag(const UnaryOperator *e, QualType promotionType = QualType()); + mlir::Value VisitUnaryExtension(const UnaryOperator *e) { + return Visit(e->getSubExpr()); + } + mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); return Visit(die->getExpr()); @@ -687,6 +691,10 @@ public: mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) { return cgf.emitCXXNewExpr(e); } + mlir::Value VisitCXXDeleteExpr(const CXXDeleteExpr *e) { + cgf.emitCXXDeleteExpr(e); + return {}; + } mlir::Value VisitCXXThrowExpr(const CXXThrowExpr *e) { cgf.emitCXXThrowExpr(e); @@ -1274,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e, } else if (const auto *uo = dyn_cast<UnaryOperator>(e)) { switch (uo->getOpcode()) { case UO_Imag: - cgf.cgm.errorNYI(e->getSourceRange(), - "ScalarExprEmitter::emitPromoted unary imag"); - return {}; case UO_Real: return VisitRealImag(uo, promotionType); case UO_Minus: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 166435f..ef07db3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1197,6 +1197,8 @@ public: bool delegating, Address thisAddr, CallArgList &args, clang::SourceLocation loc); + void emitCXXDeleteExpr(const CXXDeleteExpr *e); + void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type, bool forVirtualBase, bool delegating, Address thisAddr, QualType thisTy); @@ -1244,6 +1246,9 @@ public: void emitDelegatingCXXConstructorCall(const CXXConstructorDecl *ctor, const FunctionArgList &args); + void emitDeleteCall(const FunctionDecl *deleteFD, mlir::Value ptr, + QualType deleteTy); + mlir::LogicalResult emitDoStmt(const clang::DoStmt &s); /// Emit an expression as an initializer for an object (variable, field, etc.) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index eef23a0..c977ff9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -119,6 +119,19 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, cir::OptInfoAttr::get(&mlirContext, cgo.OptimizationLevel, cgo.OptimizeSize)); + // Set the module name to be the name of the main file. TranslationUnitDecl + // often contains invalid source locations and isn't a reliable source for the + // module location. + FileID mainFileId = astContext.getSourceManager().getMainFileID(); + const FileEntry &mainFile = + *astContext.getSourceManager().getFileEntryForID(mainFileId); + StringRef path = mainFile.tryGetRealPathName(); + if (!path.empty()) { + theModule.setSymName(path); + theModule->setLoc(mlir::FileLineColLoc::get(&mlirContext, path, + /*line=*/0, + /*column=*/0)); + } } CIRGenModule::~CIRGenModule() = default; diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index e842892..644c383 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPForDirectiveClass: case Stmt::OMPForSimdDirectiveClass: case Stmt::OMPSectionsDirectiveClass: diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 58ef500..fb87036 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() { return success(); } -void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, - llvm::StringRef sym_name, mlir::Type sym_type, - bool isConstant, cir::GlobalLinkageKind linkage) { +void cir::GlobalOp::build( + OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name, + mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage, + function_ref<void(OpBuilder &, Location)> ctorBuilder, + function_ref<void(OpBuilder &, Location)> dtorBuilder) { odsState.addAttribute(getSymNameAttrName(odsState.name), odsBuilder.getStringAttr(sym_name)); odsState.addAttribute(getSymTypeAttrName(odsState.name), @@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); + Region *ctorRegion = odsState.addRegion(); + if (ctorBuilder) { + odsBuilder.createBlock(ctorRegion); + ctorBuilder(odsBuilder, odsState.location); + } + + Region *dtorRegion = odsState.addRegion(); + if (dtorBuilder) { + odsBuilder.createBlock(dtorRegion); + dtorBuilder(odsBuilder, odsState.location); + } + odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name), cir::VisibilityAttr::get(odsBuilder.getContext())); } +/// Given the region at `index`, or the parent operation if `index` is None, +/// return the successor regions. These are the regions that may be selected +/// during the flow of control. `operands` is a set of optional attributes that +/// correspond to a constant value for each operand, or null if that operand is +/// not a constant. +void cir::GlobalOp::getSuccessorRegions( + mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> ®ions) { + // The `ctor` and `dtor` regions always branch back to the parent operation. + if (!point.isParent()) { + regions.push_back(RegionSuccessor()); + return; + } + + // Don't consider the ctor region if it is empty. + Region *ctorRegion = &this->getCtorRegion(); + if (ctorRegion->empty()) + ctorRegion = nullptr; + + // Don't consider the dtor region if it is empty. + Region *dtorRegion = &this->getCtorRegion(); + if (dtorRegion->empty()) + dtorRegion = nullptr; + + // If the condition isn't constant, both regions may be executed. + if (ctorRegion) + regions.push_back(RegionSuccessor(ctorRegion)); + if (dtorRegion) + regions.push_back(RegionSuccessor(dtorRegion)); +} + static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op, - TypeAttr type, - Attribute initAttr) { + TypeAttr type, Attribute initAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { + auto printType = [&]() { p << ": " << type; }; if (!op.isDeclaration()) { p << "= "; - // This also prints the type... - if (initAttr) - printConstant(p, initAttr); + if (!ctorRegion.empty()) { + p << "ctor "; + printType(); + p << " "; + p.printRegion(ctorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } else { + // This also prints the type... + if (initAttr) + printConstant(p, initAttr); + } + + if (!dtorRegion.empty()) { + p << " dtor "; + p.printRegion(dtorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } } else { - p << ": " << type; + printType(); } } -static ParseResult -parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, - Attribute &initialValueAttr) { +static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, + TypeAttr &typeAttr, + Attribute &initialValueAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { mlir::Type opTy; if (parser.parseOptionalEqual().failed()) { // Absence of equal means a declaration, so we need to parse the type. @@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, if (parser.parseColonType(opTy)) return failure(); } else { - // Parse constant with initializer, examples: - // cir.global @y = #cir.fp<1.250000e+00> : !cir.double - // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> - if (parseConstantValue(parser, initialValueAttr).failed()) - return failure(); + // Parse contructor, example: + // cir.global @rgb = ctor : type { ... } + if (!parser.parseOptionalKeyword("ctor")) { + if (parser.parseColonType(opTy)) + return failure(); + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed()) + return failure(); + } else { + // Parse constant with initializer, examples: + // cir.global @y = 3.400000e+00 : f32 + // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> + if (parseConstantValue(parser, initialValueAttr).failed()) + return failure(); + + assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && + "Non-typed attrs shouldn't appear here."); + auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); + opTy = typedAttr.getType(); + } - assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && - "Non-typed attrs shouldn't appear here."); - auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); - opTy = typedAttr.getType(); + // Parse destructor, example: + // dtor { ... } + if (!parser.parseOptionalKeyword("dtor")) { + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed()) + return failure(); + } } typeAttr = TypeAttr::get(opTy); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 57db20f7..64f1917 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (std::optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(GCOVProfilerPass(*Options)); + [this, Options](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass( + GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr())); }); if (std::optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 12c7d48..fee6bc0 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -26,6 +26,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" +#include "clang/AST/LambdaCapture.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/VTableBuilder.h" @@ -1903,46 +1904,61 @@ CGDebugInfo::createInlinedSubprogram(StringRef FuncName, return SP; } +llvm::StringRef +CGDebugInfo::GetLambdaCaptureName(const LambdaCapture &Capture) { + if (Capture.capturesThis()) + return CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; + + assert(Capture.capturesVariable()); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + return CaptureDecl->getName(); +} + void CGDebugInfo::CollectRecordLambdaFields( const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements, llvm::DIType *RecordTy) { // For C++11 Lambdas a Field will be the same as a Capture, but the Capture // has the name and the location of the variable so we should iterate over // both concurrently. - const ASTRecordLayout &layout = CGM.getContext().getASTRecordLayout(CXXDecl); RecordDecl::field_iterator Field = CXXDecl->field_begin(); unsigned fieldno = 0; for (CXXRecordDecl::capture_const_iterator I = CXXDecl->captures_begin(), E = CXXDecl->captures_end(); I != E; ++I, ++Field, ++fieldno) { - const LambdaCapture &C = *I; - if (C.capturesVariable()) { - SourceLocation Loc = C.getLocation(); - assert(!Field->isBitField() && "lambdas don't have bitfield members!"); - ValueDecl *V = C.getCapturedVar(); - StringRef VName = V->getName(); - llvm::DIFile *VUnit = getOrCreateFile(Loc); - auto Align = getDeclAlignIfRequired(V, CGM.getContext()); - llvm::DIType *FieldType = createFieldType( - VName, Field->getType(), Loc, Field->getAccess(), - layout.getFieldOffset(fieldno), Align, VUnit, RecordTy, CXXDecl); - elements.push_back(FieldType); - } else if (C.capturesThis()) { + const LambdaCapture &Capture = *I; + const uint64_t FieldOffset = + CGM.getContext().getASTRecordLayout(CXXDecl).getFieldOffset(fieldno); + + assert(!Field->isBitField() && "lambdas don't have bitfield members!"); + + SourceLocation Loc; + uint32_t Align = 0; + + if (Capture.capturesThis()) { // TODO: Need to handle 'this' in some way by probably renaming the // this of the lambda class and having a field member of 'this' or // by using AT_object_pointer for the function and having that be // used as 'this' for semantic references. - FieldDecl *f = *Field; - llvm::DIFile *VUnit = getOrCreateFile(f->getLocation()); - QualType type = f->getType(); - StringRef ThisName = - CGM.getCodeGenOpts().EmitCodeView ? "__this" : "this"; - llvm::DIType *fieldType = createFieldType( - ThisName, type, f->getLocation(), f->getAccess(), - layout.getFieldOffset(fieldno), VUnit, RecordTy, CXXDecl); - - elements.push_back(fieldType); + Loc = Field->getLocation(); + } else if (Capture.capturesVariable()) { + Loc = Capture.getLocation(); + + const ValueDecl *CaptureDecl = Capture.getCapturedVar(); + assert(CaptureDecl && "Expected valid decl for captured variable."); + + Align = getDeclAlignIfRequired(CaptureDecl, CGM.getContext()); + } else { + continue; } + + llvm::DIFile *VUnit = getOrCreateFile(Loc); + + elements.push_back(createFieldType( + GetLambdaCaptureName(Capture), Field->getType(), Loc, + Field->getAccess(), FieldOffset, Align, VUnit, RecordTy, CXXDecl)); } } diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index f860773..78c3eb9 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -397,6 +397,7 @@ private: void CollectRecordFields(const RecordDecl *Decl, llvm::DIFile *F, SmallVectorImpl<llvm::Metadata *> &E, llvm::DICompositeType *RecordTy); + llvm::StringRef GetLambdaCaptureName(const LambdaCapture &Capture); /// If the C++ class has vtable info then insert appropriate debug /// info entry in EltTys vector. diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 07cf08c..6596ec0 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, {Src0, Src1}); } +static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) { + if (AMDGCNScope == "agent") + return "device"; + if (AMDGCNScope == "wavefront") + return "subgroup"; + return AMDGCNScope; +} + // For processing memory ordering and memory scope arguments of various // amdgcn builtins. -// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// \p Order takes a C++11 compatible memory-ordering specifier and converts // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. @@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, // Some of the atomic builtins take the scope as a string name. StringRef scp; if (llvm::getConstantStringInfo(Scope, scp)) { + if (getTarget().getTriple().isSPIRV()) + scp = mapScopeToSPIRV(scp); SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } @@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = llvm::SyncScope::System; break; case 1: // __MEMORY_SCOPE_DEVICE - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); break; case 2: // __MEMORY_SCOPE_WRKGRP SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); break; case 3: // __MEMORY_SCOPE_WVFRNT - SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); break; case 4: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; @@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // // The global/flat cases need to use agent scope to consistently produce // the native instruction instead of a cmpxchg expansion. - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f110dba..85a13357 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6613,6 +6613,9 @@ std::string Driver::GetStdModuleManifestPath(const Compilation &C, const ToolChain &TC) const { std::string error = "<NOT PRESENT>"; + if (C.getArgs().hasArg(options::OPT_nostdlib)) + return error; + switch (TC.GetCXXStdlibType(C.getArgs())) { case ToolChain::CST_Libcxx: { auto evaluate = [&](const char *library) -> std::optional<std::string> { diff --git a/clang/lib/Frontend/ModuleDependencyCollector.cpp b/clang/lib/Frontend/ModuleDependencyCollector.cpp index 3b363f9..ff37065 100644 --- a/clang/lib/Frontend/ModuleDependencyCollector.cpp +++ b/clang/lib/Frontend/ModuleDependencyCollector.cpp @@ -91,10 +91,10 @@ void ModuleDependencyCollector::attachToPreprocessor(Preprocessor &PP) { std::make_unique<ModuleDependencyMMCallbacks>(*this)); } -static bool isCaseSensitivePath(StringRef Path) { +static bool isCaseSensitivePath(llvm::vfs::FileSystem &VFS, StringRef Path) { SmallString<256> TmpDest = Path, UpperDest, RealDest; // Remove component traversals, links, etc. - if (llvm::sys::fs::real_path(Path, TmpDest)) + if (VFS.getRealPath(Path, TmpDest)) return true; // Current default value in vfs.yaml Path = TmpDest; @@ -104,7 +104,7 @@ static bool isCaseSensitivePath(StringRef Path) { // already expects when sensitivity isn't setup. for (auto &C : Path) UpperDest.push_back(toUppercase(C)); - if (!llvm::sys::fs::real_path(UpperDest, RealDest) && Path == RealDest) + if (!VFS.getRealPath(UpperDest, RealDest) && Path == RealDest) return false; return true; } @@ -121,7 +121,8 @@ void ModuleDependencyCollector::writeFileMap() { // Explicitly set case sensitivity for the YAML writer. For that, find out // the sensitivity at the path where the headers all collected to. - VFSWriter.setCaseSensitivity(isCaseSensitivePath(VFSDir)); + VFSWriter.setCaseSensitivity( + isCaseSensitivePath(Canonicalizer.getFileSystem(), VFSDir)); // Do not rely on real path names when executing the crash reproducer scripts // since we only want to actually use the files we have on the VFS cache. @@ -153,7 +154,7 @@ std::error_code ModuleDependencyCollector::copyToRoot(StringRef Src, } else { // When collecting entries from input vfsoverlays, copy the external // contents into the cache but still map from the source. - if (!fs::exists(Dst)) + if (!Canonicalizer.getFileSystem().exists(Dst)) return std::error_code(); path::append(CacheDst, Dst); Paths.CopyFrom = Dst; diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index a7f7099..d6ba19a 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2311,10 +2311,9 @@ _mm256_cvttps_epi32(__m256 __a) /// \param __a /// A 256-bit vector of [4 x double]. /// \returns A 64 bit double containing the first element of the input vector. -static __inline double __DEFAULT_FN_ATTRS -_mm256_cvtsd_f64(__m256d __a) -{ - return __a[0]; +static __inline double __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsd_f64(__m256d __a) { + return __a[0]; } /// Returns the first element of the input vector of [8 x i32]. @@ -2327,11 +2326,10 @@ _mm256_cvtsd_f64(__m256d __a) /// \param __a /// A 256-bit vector of [8 x i32]. /// \returns A 32 bit integer containing the first element of the input vector. -static __inline int __DEFAULT_FN_ATTRS -_mm256_cvtsi256_si32(__m256i __a) -{ - __v8si __b = (__v8si)__a; - return __b[0]; +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtsi256_si32(__m256i __a) { + __v8si __b = (__v8si)__a; + return __b[0]; } /// Returns the first element of the input vector of [8 x float]. @@ -2344,10 +2342,9 @@ _mm256_cvtsi256_si32(__m256i __a) /// \param __a /// A 256-bit vector of [8 x float]. /// \returns A 32 bit float containing the first element of the input vector. -static __inline float __DEFAULT_FN_ATTRS -_mm256_cvtss_f32(__m256 __a) -{ - return __a[0]; +static __inline float __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_cvtss_f32(__m256 __a) { + return __a[0]; } /* Vector replicate */ diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8605ba2..a2c6957 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_decl_attrs_on_lambda : diag::ext_decl_attrs_on_lambda) - << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute(); + << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo(); MaybeParseCXX11Attributes(D); } diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp index 1b66d83..8606227 100644 --- a/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -983,10 +983,9 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use, case UninitUse::AfterDecl: case UninitUse::AfterCall: S.Diag(VD->getLocation(), diag::warn_sometimes_uninit_var) - << VD->getDeclName() << IsCapturedByBlock - << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) - << const_cast<DeclContext*>(VD->getLexicalDeclContext()) - << VD->getSourceRange(); + << VD->getDeclName() << IsCapturedByBlock + << (Use.getKind() == UninitUse::AfterDecl ? 4 : 5) + << VD->getLexicalDeclContext() << VD->getSourceRange(); S.Diag(Use.getUser()->getBeginLoc(), diag::note_uninit_var_use) << IsCapturedByBlock << Use.getUser()->getSourceRange(); return; diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index d238b79..dc6d232 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -193,7 +193,7 @@ DiagRecursiveConstraintEval(Sema &S, llvm::FoldingSetNodeID &ID, // Sema::InstantiatingTemplate::isAlreadyBeingInstantiated function. if (S.SatisfactionStackContains(Templ, ID)) { S.Diag(E->getExprLoc(), diag::err_constraint_depends_on_self) - << const_cast<Expr *>(E) << E->getSourceRange(); + << E << E->getSourceRange(); return true; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3b267c1..3302bfc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -20108,8 +20108,9 @@ static void DoMarkVarDeclReferenced( bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr; - bool NeedDefinition = - OdrUse == OdrUseContext::Used || NeededForConstantEvaluation; + bool NeedDefinition = OdrUse == OdrUseContext::Used || + NeededForConstantEvaluation || + Var->getType()->isUndeducedType(); assert(!isa<VarTemplatePartialSpecializationDecl>(Var) && "Can't instantiate a partial template specialization."); diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp index a9319dc..ad21129 100644 --- a/clang/lib/Sema/SemaOpenACCAtomic.cpp +++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp @@ -454,9 +454,7 @@ class AtomicOperandChecker { // If nothing matches, error out. DiagnoseInvalidAtomic(BinInf->FoundExpr->getExprLoc(), SemaRef.PDiag(diag::note_acc_atomic_mismatch_operand) - << const_cast<Expr *>(AssignInf.LHS) - << const_cast<Expr *>(BinInf->LHS) - << const_cast<Expr *>(BinInf->RHS)); + << AssignInf.LHS << BinInf->LHS << BinInf->RHS); return IDACInfo::Fail(); } @@ -592,8 +590,7 @@ class AtomicOperandChecker { PartialDiagnostic PD = SemaRef.PDiag(diag::note_acc_atomic_mismatch_compound_operand) - << FirstKind << const_cast<Expr *>(FirstX) << SecondKind - << const_cast<Expr *>(SecondX); + << FirstKind << FirstX << SecondKind << SecondX; return DiagnoseInvalidAtomic(SecondX->getExprLoc(), PD); } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index f5feed6..0fa21e8 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2490,7 +2490,8 @@ VarDecl *SemaOpenMP::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) && - (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee)) + (!VD || VD->hasLocalStorage() || + !(DVarTop.AppliedToPointee && DVarTop.CKind != OMPC_reduction))) return VD ? VD : cast<VarDecl>(DVarTop.PrivateCopy->getDecl()); // Threadprivate variables must not be captured. if (isOpenMPThreadPrivate(DVarTop.CKind)) diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp index c2427dcf..6c798d6 100644 --- a/clang/lib/Sema/SemaTypeTraits.cpp +++ b/clang/lib/Sema/SemaTypeTraits.cpp @@ -1163,13 +1163,16 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT, // - it has at least one trivial eligible constructor and a trivial, // non-deleted destructor. const CXXDestructorDecl *Dtor = RD->getDestructor(); - if (UnqualT->isAggregateType()) - if (Dtor && !Dtor->isUserProvided()) - return true; - if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) - if (RD->hasTrivialDefaultConstructor() || - RD->hasTrivialCopyConstructor() || RD->hasTrivialMoveConstructor()) - return true; + if (UnqualT->isAggregateType() && (!Dtor || !Dtor->isUserProvided())) + return true; + if (RD->hasTrivialDestructor() && (!Dtor || !Dtor->isDeleted())) { + for (CXXConstructorDecl *Ctr : RD->ctors()) { + if (Ctr->isIneligibleOrNotSelected() || Ctr->isDeleted()) + continue; + if (Ctr->isTrivial()) + return true; + } + } return false; } case UTT_IsIntangibleType: diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 36f316d..0ae784c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -672,6 +672,10 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, ProgramStateRef stateTrue, stateFalse; + if (!First.Expression->getType()->isAnyPointerType() || + !Second.Expression->getType()->isAnyPointerType()) + return state; + // Assume different address spaces cannot overlap. if (First.Expression->getType()->getPointeeType().getAddressSpace() != Second.Expression->getType()->getPointeeType().getAddressSpace()) diff --git a/clang/test/Analysis/buffer-overlap-decls.c b/clang/test/Analysis/buffer-overlap-decls.c new file mode 100644 index 0000000..4830f4e --- /dev/null +++ b/clang/test/Analysis/buffer-overlap-decls.c @@ -0,0 +1,23 @@ +// RUN: %clang_analyze_cc1 -verify %s -Wno-incompatible-library-redeclaration \ +// RUN: -analyzer-checker=alpha.unix.cstring.BufferOverlap +// expected-no-diagnostics + +typedef typeof(sizeof(int)) size_t; + +void memcpy(int dst, int src, size_t size); + +void test_memcpy_proxy() { + memcpy(42, 42, 42); // no-crash +} + +void strcpy(int dst, char *src); + +void test_strcpy_proxy() { + strcpy(42, (char *)42); // no-crash +} + +void strxfrm(int dst, char *src, size_t size); + +void test_strxfrm_proxy() { + strxfrm(42, (char *)42, 42); // no-crash +} diff --git a/clang/test/Analysis/buffer-overlap.c b/clang/test/Analysis/buffer-overlap.c index 8414a76..defb17a 100644 --- a/clang/test/Analysis/buffer-overlap.c +++ b/clang/test/Analysis/buffer-overlap.c @@ -96,3 +96,10 @@ void test_snprintf6() { char b[4] = {0}; snprintf(a, sizeof(a), "%s", b); // no-warning } + +void* memcpy(void* dest, const void* src, size_t count); + +void test_memcpy_esoteric() { +label: + memcpy((char *)&&label, (const char *)memcpy, 1); +} diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index e901631..4c396d3 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() { // OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float // OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half // OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2 + +void real_on_scalar_from_imag_with_type_promotion() { + _Float16 _Complex a; + _Float16 b = __real__(__imag__ a); +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16> +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float +// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float +// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float> +// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float +// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16 +// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16> + +// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2 +// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2 +// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2 +// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1 +// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float +// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0 +// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1 +// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 + +// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2 +// OGCG: %[[B_ADDR:.*]] = alloca half, align 2 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2 +// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 diff --git a/clang/test/CIR/CodeGen/delete.cpp b/clang/test/CIR/CodeGen/delete.cpp new file mode 100644 index 0000000..f21d203 --- /dev/null +++ b/clang/test/CIR/CodeGen/delete.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fclangir -mconstructor-aliases -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -mconstructor-aliases -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +typedef __typeof(sizeof(int)) size_t; + +struct SizedDelete { + void operator delete(void*, size_t); + int member; +}; +void test_sized_delete(SizedDelete *x) { + delete x; +} + +// SizedDelete::operator delete(void*, unsigned long) +// CIR: cir.func private @_ZN11SizedDeletedlEPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZN11SizedDeletedlEPvm(ptr, i64) + +// CIR: cir.func dso_local @_Z17test_sized_deleteP11SizedDelete +// CIR: %[[X:.*]] = cir.load{{.*}} %{{.*}} +// CIR: %[[X_CAST:.*]] = cir.cast(bitcast, %[[X]] : !cir.ptr<!rec_SizedDelete>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<4> : !u64i +// CIR: cir.call @_ZN11SizedDeletedlEPvm(%[[X_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// LLVM: %[[X:.*]] = load ptr, ptr %{{.*}} +// LLVM: call void @_ZN11SizedDeletedlEPvm(ptr %[[X]], i64 4) + +// OGCG: define dso_local void @_Z17test_sized_deleteP11SizedDelete +// OGCG: %[[X:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[X]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN11SizedDeletedlEPvm(ptr noundef %[[X]], i64 noundef 4) + +// This function is declared below the call in OGCG. +// OGCG: declare void @_ZN11SizedDeletedlEPvm(ptr noundef, i64 noundef) + +struct Contents { + ~Contents() {} +}; +struct Container { + Contents *contents; + ~Container(); +}; +Container::~Container() { delete contents; } + +// Contents::~Contents() +// CIR: cir.func comdat linkonce_odr @_ZN8ContentsD2Ev +// LLVM: define linkonce_odr void @_ZN8ContentsD2Ev + +// operator delete(void*, unsigned long) +// CIR: cir.func private @_ZdlPvm(!cir.ptr<!void>, !u64i) +// LLVM: declare void @_ZdlPvm(ptr, i64) + +// Container::~Container() +// CIR: cir.func dso_local @_ZN9ContainerD2Ev +// CIR: %[[THIS:.*]] = cir.load %{{.*}} +// CIR: %[[CONTENTS_PTR_ADDR:.*]] = cir.get_member %[[THIS]][0] {name = "contents"} : !cir.ptr<!rec_Container> -> !cir.ptr<!cir.ptr<!rec_Contents>> +// CIR: %[[CONTENTS_PTR:.*]] = cir.load{{.*}} %[[CONTENTS_PTR_ADDR]] +// CIR: cir.call @_ZN8ContentsD2Ev(%[[CONTENTS_PTR]]) nothrow : (!cir.ptr<!rec_Contents>) -> () +// CIR: %[[CONTENTS_CAST:.*]] = cir.cast(bitcast, %[[CONTENTS_PTR]] : !cir.ptr<!rec_Contents>), !cir.ptr<!void> +// CIR: %[[OBJ_SIZE:.*]] = cir.const #cir.int<1> : !u64i +// CIR: cir.call @_ZdlPvm(%[[CONTENTS_CAST]], %[[OBJ_SIZE]]) nothrow : (!cir.ptr<!void>, !u64i) -> () + +// LLVM: define dso_local void @_ZN9ContainerD2Ev +// LLVM: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// LLVM: %[[CONTENTS_PTR_ADDR:.*]] = getelementptr %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// LLVM: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS_PTR_ADDR]] +// LLVM: call void @_ZN8ContentsD2Ev(ptr %[[CONTENTS_PTR]]) +// LLVM: call void @_ZdlPvm(ptr %[[CONTENTS_PTR]], i64 1) + +// OGCG: define dso_local void @_ZN9ContainerD2Ev +// OGCG: %[[THIS:.*]] = load ptr, ptr %{{.*}} +// OGCG: %[[CONTENTS:.*]] = getelementptr inbounds nuw %struct.Container, ptr %[[THIS]], i32 0, i32 0 +// OGCG: %[[CONTENTS_PTR:.*]] = load ptr, ptr %[[CONTENTS]] +// OGCG: %[[ISNULL:.*]] = icmp eq ptr %[[CONTENTS_PTR]], null +// OGCG: br i1 %[[ISNULL]], label %{{.*}}, label %[[DELETE_NOTNULL:.*]] +// OGCG: [[DELETE_NOTNULL]]: +// OGCG: call void @_ZN8ContentsD2Ev(ptr noundef nonnull align 1 dereferenceable(1) %[[CONTENTS_PTR]]) +// OGCG: call void @_ZdlPvm(ptr noundef %[[CONTENTS_PTR]], i64 noundef 1) + +// These functions are declared/defined below the calls in OGCG. +// OGCG: define linkonce_odr void @_ZN8ContentsD2Ev +// OGCG: declare void @_ZdlPvm(ptr noundef, i64 noundef) diff --git a/clang/test/CIR/CodeGen/lang-c-cpp.cpp b/clang/test/CIR/CodeGen/lang-c-cpp.cpp index e126932..8931783 100644 --- a/clang/test/CIR/CodeGen/lang-c-cpp.cpp +++ b/clang/test/CIR/CodeGen/lang-c-cpp.cpp @@ -3,8 +3,8 @@ // RUN: %clang_cc1 -x c -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.c.cir // RUN: FileCheck --check-prefix=CIR-C --input-file=%t.c.cir %s -// CIR-CPP: module attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} -// CIR-C: module attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} +// CIR-CPP: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<cxx>{{.*}}} +// CIR-C: module{{.*}} attributes {{{.*}}cir.lang = #cir.lang<c>{{.*}}} int main() { return 0; diff --git a/clang/test/CIR/CodeGen/module-filename.cpp b/clang/test/CIR/CodeGen/module-filename.cpp new file mode 100644 index 0000000..05e2e92 --- /dev/null +++ b/clang/test/CIR/CodeGen/module-filename.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// Normally, we try to avoid checking the filename of a test, but that's the +// entire point of this test, so we use a wildcard for the path but check the +// filename. +// CIR: module @"{{.*}}module-filename.cpp" + +int main() { + return 0; +} diff --git a/clang/test/CIR/CodeGen/opt-info-attr.cpp b/clang/test/CIR/CodeGen/opt-info-attr.cpp index 444286b..97071d7 100644 --- a/clang/test/CIR/CodeGen/opt-info-attr.cpp +++ b/clang/test/CIR/CodeGen/opt-info-attr.cpp @@ -13,10 +13,10 @@ void f() {} -// CHECK-O0: module attributes +// CHECK-O0: module{{.*}} attributes // CHECK-O0-NOT: cir.opt_info -// CHECK-O1: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} -// CHECK-O2: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} -// CHECK-O3: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} -// CHECK-Os: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} -// CHECK-Oz: module attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} +// CHECK-O1: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 1, size = 0>{{.+}} +// CHECK-O2: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 0>{{.+}} +// CHECK-O3: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 3, size = 0>{{.+}} +// CHECK-Os: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 1>{{.+}} +// CHECK-Oz: module{{.*}} attributes {{.+}}cir.opt_info = #cir.opt_info<level = 2, size = 2>{{.+}} diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp index 9139651..4d57f8e 100644 --- a/clang/test/CIR/CodeGen/vbase.cpp +++ b/clang/test/CIR/CodeGen/vbase.cpp @@ -13,19 +13,29 @@ public: class Derived : public virtual Base {}; -// This is just here to force the record types to be emitted. void f() { Derived d; + d.f(); +} + +class DerivedFinal final : public virtual Base {}; + +void g() { + DerivedFinal df; + df.f(); } // CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}> // CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}> +// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}> // LLVM: %class.Derived = type { %class.Base } // LLVM: %class.Base = type { ptr } +// LLVM: %class.DerivedFinal = type { %class.Base } // OGCG: %class.Derived = type { %class.Base } // OGCG: %class.Base = type { ptr } +// OGCG: %class.DerivedFinal = type { %class.Base } // Test the constructor handling for a class with a virtual base. struct A { @@ -47,6 +57,76 @@ void ppp() { B b; } // OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8 +// CIR: cir.func {{.*}}@_Z1fv() { +// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init] +// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> () +// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr> +// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i> +// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i +// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i> +// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i +// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived> +// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// CIR: cir.func {{.*}}@_Z1gv() { +// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init] +// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> () +// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// LLVM: define {{.*}}void @_Z1fv() +// LLVM: %[[D:.+]] = alloca {{.*}} +// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]]) +// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]] +// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32 +// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]] +// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]] +// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0 +// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]] +// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]]) +// LLVM: ret void + +// LLVM: define {{.*}}void @_Z1gv() +// LLVM: %[[DF:.+]] = alloca {{.*}} +// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]]) +// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]] +// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0 +// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]] +// LLVM: call void %[[VFN2]](ptr %[[DF]]) +// LLVM: ret void + +// OGCG: define {{.*}}void @_Z1fv() +// OGCG: %[[D:.+]] = alloca {{.*}} +// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]]) +// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]] +// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32 +// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]] +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]]) +// OGCG: ret void + +// OGCG: define {{.*}}void @_Z1gv() +// OGCG: %[[DF:.+]] = alloca {{.*}} +// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]]) +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]]) +// OGCG: ret void + // Constructor for B // CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B> // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 8b5379a..8bca48d 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -1322,3 +1322,23 @@ void logical_not() { // OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d8fdeea..f242779 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -1390,3 +1390,23 @@ void logical_not_float() { // OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir new file mode 100644 index 0000000..727c067 --- /dev/null +++ b/clang/test/CIR/IR/global-init.cir @@ -0,0 +1,48 @@ +// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s + +!u8i = !cir.int<u, 8> + +!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}> +!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}> +!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}> + +module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { + cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>) + cir.global external @needsCtor = ctor : !rec_NeedsCtor { + %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + } + // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor { + // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + // CHECK: } + + cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>) + cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + } + // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + // CHECK: } + + cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } dtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } + // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } dtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } +} diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 347cd9e..3018bb97 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -985,18 +985,21 @@ double test_mm256_cvtsd_f64(__m256d __a) { // CHECK: extractelement <4 x double> %{{.*}}, i32 0 return _mm256_cvtsd_f64(__a); } +TEST_CONSTEXPR(_mm256_cvtsd_f64((__m256d){8.0, 7.0, 6.0, 5.0}) == 8.0); int test_mm256_cvtsi256_si32(__m256i __a) { // CHECK-LABEL: test_mm256_cvtsi256_si32 // CHECK: extractelement <8 x i32> %{{.*}}, i32 0 return _mm256_cvtsi256_si32(__a); } +TEST_CONSTEXPR(_mm256_cvtsi256_si32((__m256i)(__v8si){8, 7, 6, 5, 4, 3, 2, 1}) == 8); float test_mm256_cvtss_f32(__m256 __a) { // CHECK-LABEL: test_mm256_cvtss_f32 // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } +TEST_CONSTEXPR(_mm256_cvtss_f32((__m256){8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}) == 8.0f); __m128i test_mm256_cvttpd_epi32(__m256d A) { // CHECK-LABEL: test_mm256_cvttpd_epi32 diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c index ded40ca..d0ae0c7 100644 --- a/clang/test/CodeGen/X86/bmi-builtins.c +++ b/clang/test/CodeGen/X86/bmi-builtins.c @@ -1,7 +1,16 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT -// RUN: %clang_cc1 -x c++ -std=c++11 -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefix=TZCNT +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT | FileCheck %s --check-prefixes=TZCNT,TZCNT64 + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64,TZCNT,TZCNT64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,TZCNT +// RUN: %clang_cc1 -x c++ -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 -ffreestanding %s -triple=x86_64-windows-msvc -emit-llvm -o - -Wall -Werror -DTEST_TZCNT -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=TZCNT,TZCNT64 #include <immintrin.h> @@ -48,20 +57,20 @@ unsigned int test_tzcnt_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test__tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test__tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return __tzcnt_u64(__X); } long long test_mm_tzcnt_64(unsigned long long __X) { -// TZCNT-LABEL: test_mm_tzcnt_64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_mm_tzcnt_64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _mm_tzcnt_64(__X); } unsigned long long test_tzcnt_u64(unsigned long long __X) { -// TZCNT-LABEL: test_tzcnt_u64 -// TZCNT: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) +// TZCNT64-LABEL: test_tzcnt_u64 +// TZCNT64: i64 @llvm.cttz.i64(i64 %{{.*}}, i1 false) return _tzcnt_u64(__X); } #endif @@ -103,36 +112,36 @@ unsigned int test__blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test__andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return __andn_u64(__X, __Y); } unsigned long long test__bextr_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test__bextr_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test__bextr_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return __bextr_u64(__X, __Y); } unsigned long long test__blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return __blsi_u64(__X); } unsigned long long test__blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return __blsmsk_u64(__X); } unsigned long long test__blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test__blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test__blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return __blsr_u64(__X); } #endif @@ -186,49 +195,49 @@ unsigned int test_blsr_u32(unsigned int __X) { #ifdef __x86_64__ unsigned long long test_andn_u64(unsigned long __X, unsigned long __Y) { -// CHECK-LABEL: test_andn_u64 -// CHECK: xor i64 %{{.*}}, -1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_andn_u64 +// X64: xor i64 %{{.*}}, -1 +// X64: and i64 %{{.*}}, %{{.*}} return _andn_u64(__X, __Y); } unsigned long long test_bextr_u64(unsigned long __X, unsigned int __Y, unsigned int __Z) { -// CHECK-LABEL: test_bextr_u64 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: and i32 %{{.*}}, 255 -// CHECK: shl i32 %{{.*}}, 8 -// CHECK: or i32 %{{.*}}, %{{.*}} -// CHECK: zext i32 %{{.*}} to i64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr_u64 +// X64: and i32 %{{.*}}, 255 +// X64: and i32 %{{.*}}, 255 +// X64: shl i32 %{{.*}}, 8 +// X64: or i32 %{{.*}}, %{{.*}} +// X64: zext i32 %{{.*}} to i64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr_u64(__X, __Y, __Z); } unsigned long long test_bextr2_u64(unsigned long long __X, unsigned long long __Y) { -// CHECK-LABEL: test_bextr2_u64 -// CHECK: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) +// X64-LABEL: test_bextr2_u64 +// X64: i64 @llvm.x86.bmi.bextr.64(i64 %{{.*}}, i64 %{{.*}}) return _bextr2_u64(__X, __Y); } unsigned long long test_blsi_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsi_u64 -// CHECK: sub i64 0, %{{.*}} -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsi_u64 +// X64: sub i64 0, %{{.*}} +// X64: and i64 %{{.*}}, %{{.*}} return _blsi_u64(__X); } unsigned long long test_blsmsk_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsmsk_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: xor i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsmsk_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: xor i64 %{{.*}}, %{{.*}} return _blsmsk_u64(__X); } unsigned long long test_blsr_u64(unsigned long long __X) { -// CHECK-LABEL: test_blsr_u64 -// CHECK: sub i64 %{{.*}}, 1 -// CHECK: and i64 %{{.*}}, %{{.*}} +// X64-LABEL: test_blsr_u64 +// X64: sub i64 %{{.*}}, 1 +// X64: and i64 %{{.*}}, %{{.*}} return _blsr_u64(__X); } #endif diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 48424f5..1b2cb90 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -3,6 +3,11 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefix=B32 + #include <immintrin.h> diff --git a/clang/test/CodeGen/X86/tbm-builtins.c b/clang/test/CodeGen/X86/tbm-builtins.c index d916627..89746bf 100644 --- a/clang/test/CodeGen/X86/tbm-builtins.c +++ b/clang/test/CodeGen/X86/tbm-builtins.c @@ -1,5 +1,12 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK + +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK,X64 +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-unknown-unknown -target-feature +tbm -emit-llvm -o - -fexperimental-new-constant-interpreter | FileCheck %s --check-prefixes=CHECK #include <x86intrin.h> @@ -13,14 +20,14 @@ unsigned int test__bextri_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__bextri_u64(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64 - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) + // X64-LABEL: test__bextri_u64 + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 2) return __bextri_u64(a, 2); } unsigned long long test__bextri_u64_bigint(unsigned long long a) { - // CHECK-LABEL: test__bextri_u64_bigint - // CHECK: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) + // X64-LABEL: test__bextri_u64_bigint + // X64: call i64 @llvm.x86.tbm.bextri.u64(i64 %{{.*}}, i64 549755813887) return __bextri_u64(a, 0x7fffffffffLL); } #endif @@ -34,9 +41,9 @@ unsigned int test__blcfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blcfill_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK: %{{.*}} = and i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcfill_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64: %{{.*}} = and i64 %{{.*}}, [[TMP]] return __blcfill_u64(a); } #endif @@ -51,10 +58,10 @@ unsigned int test__blci_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blci_u64(unsigned long long a) { - // CHECK-LABEL: test__blci_u64 - // CHECK: [[TMP1:%.*]] = add i64 %{{.*}}, 1 - // CHECK: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 - // CHECK: %{{.*}} = or i64 %{{.*}}, [[TMP2]] + // X64-LABEL: test__blci_u64 + // X64: [[TMP1:%.*]] = add i64 %{{.*}}, 1 + // X64: [[TMP2:%.*]] = xor i64 [[TMP1]], -1 + // X64: %{{.*}} = or i64 %{{.*}}, [[TMP2]] return __blci_u64(a); } #endif @@ -69,10 +76,10 @@ unsigned int test__blcic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcic_u64(unsigned long long a) { - // CHECK-LABEL: test__blcic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blcic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __blcic_u64(a); } #endif @@ -86,9 +93,9 @@ unsigned int test__blcmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__blcmsk_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcmsk_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = xor i64 %{{.*}}, [[TMP]] return __blcmsk_u64(a); } #endif @@ -102,9 +109,9 @@ unsigned int test__blcs_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blcs_u64(unsigned long long a) { - // CHECK-LABEL: test__blcs_u64 - // CHECK: [[TMP:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blcs_u64 + // X64: [[TMP:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blcs_u64(a); } #endif @@ -118,9 +125,9 @@ unsigned int test__blsfill_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsfill_u64(unsigned long long a) { - // CHECK-LABEL: test__blsfill_u64 - // CHECK: [[TMP:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] + // X64-LABEL: test__blsfill_u64 + // X64: [[TMP:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 %{{.*}}, [[TMP]] return __blsfill_u64(a); } #endif @@ -135,10 +142,10 @@ unsigned int test__blsic_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__blsic_u64(unsigned long long a) { - // CHECK-LABEL: test__blsic_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__blsic_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __blsic_u64(a); } #endif @@ -153,10 +160,10 @@ unsigned int test__t1mskc_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__t1mskc_u64(unsigned long long a) { - // CHECK-LABEL: test__t1mskc_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = add i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__t1mskc_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = add i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = or i64 [[TMP1]], [[TMP2]] return __t1mskc_u64(a); } #endif @@ -171,10 +178,10 @@ unsigned int test__tzmsk_u32(unsigned int a) { #ifdef __x86_64__ unsigned long long test__tzmsk_u64(unsigned long long a) { - // CHECK-LABEL: test__tzmsk_u64 - // CHECK: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 - // CHECK: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 - // CHECK-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] + // X64-LABEL: test__tzmsk_u64 + // X64: [[TMP1:%.*]] = xor i64 %{{.*}}, -1 + // X64: [[TMP2:%.*]] = sub i64 %{{.*}}, 1 + // X64-NEXT: {{.*}} = and i64 [[TMP1]], [[TMP2]] return __tzmsk_u64(a); } #endif diff --git a/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c new file mode 100644 index 0000000..ef68c79 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-image-rsrc-type-debug-info.c @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited | FileCheck %s + +// CHECK-LABEL: define dso_local void @test_locals( +// CHECK-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[IMG:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[IMG_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IMG]] to ptr +// CHECK-NEXT: #dbg_declare(ptr addrspace(5) [[IMG]], [[META11:![0-9]+]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), [[META14:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[IMG_ASCAST]], align 32, !dbg [[DBG15:![0-9]+]] +// CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +// +void test_locals(void) { + __amdgpu_texture_t img; + (void)img; +} diff --git a/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp new file mode 100644 index 0000000..0dbd517 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-image-rsrc-typeinfo.cpp @@ -0,0 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s +namespace std { class type_info; } +auto &a = typeid(__amdgpu_texture_t); +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp index 5920ced..137a49b 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( // CHECK-NEXT: entry: @@ -21,6 +24,43 @@ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter64Py( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter32PVj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter64PVy( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() { // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global32() { global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); @@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global64() { global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); @@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant32() { __UINT32_TYPE__ local_val; @@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant64() { __UINT64_TYPE__ local_val; @@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope64() { __attribute__((shared)) __UINT64_TYPE__ val; diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp index 1e977dd..dd1ca45 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -12,6 +15,25 @@ // CHECK-NEXT: fence syncscope("agent") acq_rel // CHECK-NEXT: fence syncscope("workgroup") release // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv( +// GCN-SAME: ) #[[ATTR0:[0-9]+]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst +// GCN-NEXT: fence syncscope("agent") acquire +// GCN-NEXT: fence seq_cst +// GCN-NEXT: fence syncscope("agent") acq_rel +// GCN-NEXT: fence syncscope("workgroup") release +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire +// AMDGCNSPIRV-NEXT: fence seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release +// AMDGCNSPIRV-NEXT: ret void // void test_memory_fence_success() { @@ -35,6 +57,25 @@ void test_memory_fence_success() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_localv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_local() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -58,6 +99,25 @@ void test_local() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z11test_globalv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]] +// GCN-NEXT: fence seq_cst, !mmra [[META4]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: ret void // void test_global() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global"); @@ -80,6 +140,25 @@ void test_global() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_imagev( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_image() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -99,13 +178,33 @@ void test_image() { // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_mixedv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// AMDGCNSPIRV-NEXT: ret void // void test_mixed() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global"); __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local"); } -//. // CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} // CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} // CHECK: [[META5]] = !{[[META4]], [[META3]]} //. +// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// GCN: [[META5]] = !{[[META4]], [[META3]]} +//. +// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]} +//. diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp new file mode 100644 index 0000000..06a496e --- /dev/null +++ b/clang/test/CodeGenCXX/gh56652.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +namespace GH56652{ + +struct foo {}; + +template <typename T> struct bar { + using type = T; + + template <foo> inline static constexpr auto b = true; +}; + +template <typename T> +concept C = requires(T a) { T::template b<foo{}>; }; + +template <typename T> auto fn(T) { + if constexpr (!C<T>) + return foo{}; + else + return T{}; +} + +auto a = decltype(fn(bar<int>{})){}; + +} + +namespace GH116319 { + +template <int = 0> struct a { +template <class> static constexpr auto b = 2; +template <class> static void c() noexcept(noexcept(b<int>)) {} +}; + +void test() { a<>::c<int>(); } + + +} + +// CHECK: %"struct.GH56652::bar" = type { i8 } +// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any +// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index 19ab656..7cd3f14 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -1,13 +1,13 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s typedef unsigned int uint; typedef unsigned long ulong; @@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} #if !defined(__SPIRV__) void test_global_add_f32(float *rtn, global float *addr, float x) { #else diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 5f202ba..6bb20bf 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -1,9 +1,9 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1) // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} #if !defined(__SPIRV__) @@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) { // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} @@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) { // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 039d032..ab0b0b9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4 res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4 res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent"); // CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4 diff --git a/clang/test/Driver/modules-print-library-module-manifest-path.cpp b/clang/test/Driver/modules-print-library-module-manifest-path.cpp index 7606713..af0f124 100644 --- a/clang/test/Driver/modules-print-library-module-manifest-path.cpp +++ b/clang/test/Driver/modules-print-library-module-manifest-path.cpp @@ -18,6 +18,14 @@ // RUN: --target=x86_64-linux-gnu 2>&1 \ // RUN: | FileCheck libcxx.cpp +// check that -nostdlib causes no library-provided module manifest to +// be reported, even when libc++.modules.json is present. +// RUN: %clang -print-library-module-manifest-path \ +// RUN: -nostdlib \ +// RUN: -resource-dir=%t/Inputs/usr/lib/x86_64-linux-gnu \ +// RUN: --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck libcxx-no-module-json.cpp + // for macos there is a different directory structure // where the library and libc++.modules.json file are in lib // directly but headers are in clang/ver directory which diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp index 83632db..cb4bcc9 100644 --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -27,7 +27,6 @@ struct S { ~S() {} }; - template <typename T, int length> T tmain() { T t; @@ -60,6 +59,15 @@ T tmain() { } extern S<float> **foo(); +int g_arr[10]; + +void reductionArrayElement() { +#pragma omp parallel +#pragma omp for reduction(+:g_arr[1]) + for (int i = 0; i < 10; i++) { + g_arr[1] += i; + } +} int main() { #ifdef LAMBDA @@ -164,6 +172,7 @@ int main() { #pragma omp for reduction(& : var3) for (int i = 0; i < 10; ++i) ; + reductionArrayElement(); return tmain<int, 42>(); #endif } @@ -535,6 +544,26 @@ int main() { //. // CHECK4: @.gomp_critical_user_.reduction.var = common global [8 x i32] zeroinitializer, align 8 //. + +// CHECK1-LABEL: define {{.*}}reductionArrayElement{{.*}}.omp_outlined{{.*}} +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1: [[G_ARR:%.*]] = alloca i32, align 4 +// CHECK1: [[TMP0:%.*]] = sdiv exact i64 sub (i64 ptrtoint (ptr @g_arr to i64){{.*}} +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[G_ARR:%.*]], i64 [[TMP0]] +// CHECK1: omp.inner.for.body: +// CHECK1: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]],{{.+}} +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void {{.*}}__kmpc_for_static_fini{{.+}} +// CHECK1: {{.*}}call i32 {{.*}}__kmpc_reduce{{.+}} +// CHECK1: omp.reduction.default: +// CHECK1-NEXT: call void @__kmpc_barrier{{.+}} +// CHECK1-NEXT: ret void +// + // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: @@ -614,6 +643,7 @@ int main() { // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.11, ptr [[TMP7]]) // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8 // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @main.omp_outlined.12, ptr [[TMP8]]) +// CHECK1-NEXT: call void {{.*}}reductionArrayElement{{.*}} // CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() // CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp index 7ffb7aae..8c7a778 100644 --- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp +++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20 -// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23 -// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 -// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23 - -//cxx23-no-diagnostics +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23 auto L1 = [] constexpr {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} @@ -14,3 +12,25 @@ auto L3 = [] static {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} // cxx20-warning@-2 {{static lambdas are a C++23 extension}} // precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}} + +namespace GH161070 { +void t1() { int a = [] __arm_streaming; } +// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}} +// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}} + +void t2() { int a = [] [[assume(true)]]; } +// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}} +// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}} +} diff --git a/clang/test/SemaCXX/amdgpu-image-rsrc.cpp b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..61a82d4 --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-image-rsrc.cpp @@ -0,0 +1,17 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_texture_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + reinterpret_cast<__amdgpu_texture_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_texture_t' is not allowed}} + (void)(v + v); // expected-error {{invalid operands to binary expression ('__amdgpu_texture_t' and '__amdgpu_texture_t')}} + int x(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_texture_t'}} + __amdgpu_texture_t k; +} + +template<class T> void bar(T); +void use(__amdgpu_texture_t r) { bar(r); } +struct S { __amdgpu_texture_t r; int a; }; diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp index 739485b..45a4c4c 100644 --- a/clang/test/SemaCXX/decltype.cpp +++ b/clang/test/SemaCXX/decltype.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s // PR5290 int const f0(); @@ -156,6 +157,8 @@ struct A { } }; + + // This shouldn't crash. static_assert(A<int>().f<int>() == 0, ""); // The result should not be dependent. @@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion // expected-note@-1 {{expression evaluates to '0 != 0'}} } + +#if __cplusplus >= 201703L +namespace GH160497 { + +template <class> struct S { + template <class> + inline static auto mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return 42; + }(); +}; + +using T = decltype(S<void>::mem<void>); + // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}} + + +template <class> struct S2 { + template <class> + inline static auto* mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return static_cast<int*>(nullptr); + }(); +}; + +using T2 = decltype(S2<void>::mem<void>); +//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}} + +template <class> struct S3 { + template <class> + inline static int mem = // Check we don't instantiate when the type is not deduced. + [] { static_assert(false); + return 42; + }(); +}; + +using T = decltype(S3<void>::mem<void>); +} + +namespace N1 { + +template<class> +struct S { + template<class> + inline static auto mem = 42; +}; + +using T = decltype(S<void>::mem<void>); + +T y = 42; + +} + +namespace GH161196 { + +template <typename> struct A { + static constexpr int digits = 0; +}; + +template <typename> struct B { + template <int, typename MaskInt = int, int = A<MaskInt>::digits> + static constexpr auto XBitMask = 0; +}; + +struct C { + using ReferenceHost = B<int>; + template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask; +}; + +void test() { (void)C::XBitMask<0>; } + +} +#endif + template<typename> class conditional { }; diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp index 3f01247..d49330f 100644 --- a/clang/test/SemaCXX/type-traits.cpp +++ b/clang/test/SemaCXX/type-traits.cpp @@ -2038,6 +2038,49 @@ void is_implicit_lifetime(int n) { static_assert(__builtin_is_implicit_lifetime(int * __restrict)); } +namespace GH160610 { +class NonAggregate { +public: + NonAggregate() = default; + + NonAggregate(const NonAggregate&) = delete; + NonAggregate& operator=(const NonAggregate&) = delete; +private: + int num; +}; + +class DataMemberInitializer { +public: + DataMemberInitializer() = default; + + DataMemberInitializer(const DataMemberInitializer&) = delete; + DataMemberInitializer& operator=(const DataMemberInitializer&) = delete; +private: + int num = 0; +}; + +class UserProvidedConstructor { +public: + UserProvidedConstructor() {} + + UserProvidedConstructor(const UserProvidedConstructor&) = delete; + UserProvidedConstructor& operator=(const UserProvidedConstructor&) = delete; +}; + +static_assert(__builtin_is_implicit_lifetime(NonAggregate)); +static_assert(!__builtin_is_implicit_lifetime(DataMemberInitializer)); +static_assert(!__builtin_is_implicit_lifetime(UserProvidedConstructor)); + +#if __cplusplus >= 202002L +template <typename T> +class Tpl { + Tpl() requires false = default ; +}; +static_assert(!__builtin_is_implicit_lifetime(Tpl<int>)); + +#endif +} + void is_signed() { //static_assert(__is_signed(char)); diff --git a/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl new file mode 100644 index 0000000..dc56494 --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-image-rsrc.cl @@ -0,0 +1,13 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa %s + +void f() { + int n = 3; + __amdgpu_texture_t v = (__amdgpu_texture_t)0; // expected-error {{used type '__amdgpu_texture_t' where arithmetic or pointer type is required}} + int k = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_texture_t'}} + (void)(v + v); // expected-error {{invalid operands}} + __amdgpu_texture_t r; + int *p = (int*)r; // expected-error {{operand of type '__amdgpu_texture_t' where arithmetic or pointer type is required}} +} diff --git a/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp new file mode 100644 index 0000000..51b3f72 --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-image-rsrc.cpp @@ -0,0 +1,12 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 5; + __amdgpu_texture_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_texture_t' with an rvalue of type 'int'}} + (void)(v + v); // expected-error {{invalid operands to binary expression}} + } +} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 0e2758d..e41f4eb 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -420,7 +420,7 @@ public: std::vector<ModuleDeps *> NewMDs; { std::unique_lock<std::mutex> ul(Lock); - for (const ModuleDeps &MD : Graph) { + for (ModuleDeps &MD : Graph) { auto I = Modules.find({MD.ID, 0}); if (I != Modules.end()) { I->first.InputIndex = std::min(I->first.InputIndex, InputIndex); |