diff options
113 files changed, 4581 insertions, 825 deletions
diff --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp index affa276..5770bf7 100644 --- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp +++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp @@ -164,6 +164,22 @@ getNewFieldsOrder(const RecordDecl *Definition, return NewFieldsOrder; } +static bool isOrderValid(const RecordDecl *RD, ArrayRef<unsigned> FieldOrder) { + if (FieldOrder.empty()) + return false; + + // If there is a flexible array member in the struct, it must remain the last + // field. + if (RD->hasFlexibleArrayMember() && + FieldOrder.back() != FieldOrder.size() - 1) { + llvm::errs() + << "Flexible array member must remain the last field in the struct\n"; + return false; + } + + return true; +} + struct ReorderedStruct { public: ReorderedStruct(const RecordDecl *Decl, ArrayRef<unsigned> NewFieldsOrder) @@ -662,7 +678,7 @@ public: return; SmallVector<unsigned, 4> NewFieldsOrder = getNewFieldsOrder(RD, DesiredFieldsOrder); - if (NewFieldsOrder.empty()) + if (!isOrderValid(RD, NewFieldsOrder)) return; ReorderedStruct RS{RD, NewFieldsOrder}; @@ -699,7 +715,7 @@ public: std::unique_ptr<ASTConsumer> ReorderFieldsAction::newASTConsumer() { return std::make_unique<ReorderingConsumer>(RecordName, DesiredFieldsOrder, - Replacements); + Replacements); } } // namespace reorder_fields diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp index 78e58bc..36ac9a4 100644 --- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp @@ -19,7 +19,7 @@ ComparisonInTempFailureRetryCheck::ComparisonInTempFailureRetryCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), RawRetryList(Options.get("RetryMacros", "TEMP_FAILURE_RETRY")) { - StringRef(RawRetryList).split(RetryMacros, ",", -1, false); + RawRetryList.split(RetryMacros, ",", -1, false); } void ComparisonInTempFailureRetryCheck::storeOptions( diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp index 227641d..1700502 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp @@ -92,7 +92,7 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name, RawAssertList(Options.get("AssertMacros", "assert,NSAssert,NSCAssert")), IgnoredFunctions(utils::options::parseListPair( "__builtin_expect;", Options.get("IgnoredFunctions", ""))) { - StringRef(RawAssertList).split(AssertMacros, ",", -1, false); + RawAssertList.split(AssertMacros, ",", -1, false); } // The options are explained in AssertSideEffectCheck.h. diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp index 3d839b5..837a86f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp @@ -39,12 +39,12 @@ ExceptionEscapeCheck::ExceptionEscapeCheck(StringRef Name, RawIgnoredExceptions(Options.get("IgnoredExceptions", "")) { llvm::SmallVector<StringRef, 8> FunctionsThatShouldNotThrowVec, IgnoredExceptionsVec; - StringRef(RawFunctionsThatShouldNotThrow) - .split(FunctionsThatShouldNotThrowVec, ",", -1, false); + RawFunctionsThatShouldNotThrow.split(FunctionsThatShouldNotThrowVec, ",", -1, + false); FunctionsThatShouldNotThrow.insert_range(FunctionsThatShouldNotThrowVec); llvm::StringSet<> IgnoredExceptions; - StringRef(RawIgnoredExceptions).split(IgnoredExceptionsVec, ",", -1, false); + RawIgnoredExceptions.split(IgnoredExceptionsVec, ",", -1, false); IgnoredExceptions.insert_range(IgnoredExceptionsVec); Tracer.ignoreExceptions(std::move(IgnoredExceptions)); Tracer.ignoreBadAlloc(true); diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h index ae6e202..bd1e7ba 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h @@ -33,8 +33,8 @@ public: void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - std::string RawFunctionsThatShouldNotThrow; - std::string RawIgnoredExceptions; + StringRef RawFunctionsThatShouldNotThrow; + StringRef RawIgnoredExceptions; llvm::StringSet<> FunctionsThatShouldNotThrow; utils::ExceptionAnalyzer Tracer; diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp index aa95fad..b8bca72 100644 --- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp +++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp @@ -20,7 +20,7 @@ ProperlySeededRandomGeneratorCheck::ProperlySeededRandomGeneratorCheck( : ClangTidyCheck(Name, Context), RawDisallowedSeedTypes( Options.get("DisallowedSeedTypes", "time_t,std::time_t")) { - StringRef(RawDisallowedSeedTypes).split(DisallowedSeedTypes, ','); + RawDisallowedSeedTypes.split(DisallowedSeedTypes, ','); } void ProperlySeededRandomGeneratorCheck::storeOptions( diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h index ea30127..7da01cc 100644 --- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h +++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h @@ -33,7 +33,7 @@ private: void checkSeed(const ast_matchers::MatchFinder::MatchResult &Result, const T *Func); - std::string RawDisallowedSeedTypes; + StringRef RawDisallowedSeedTypes; SmallVector<StringRef, 5> DisallowedSeedTypes; }; diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index 4084d71..b921819 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -494,7 +494,7 @@ UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context) NullMacrosStr(Options.get("NullMacros", "NULL")), IgnoredTypes(utils::options::parseStringList(Options.get( "IgnoredTypes", "_CmpUnspecifiedParam;^std::__cmp_cat::__unspec"))) { - StringRef(NullMacrosStr).split(NullMacros, ","); + NullMacrosStr.split(NullMacros, ","); } void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp index f9becee..3801fc0 100644 --- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp +++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp @@ -23,7 +23,7 @@ ExceptionEscapeCheck::ExceptionEscapeCheck(StringRef Name, llvm::SmallVector<StringRef, 8> IgnoredExceptionsVec; llvm::StringSet<> IgnoredExceptions; - StringRef(RawIgnoredExceptions).split(IgnoredExceptionsVec, ",", -1, false); + RawIgnoredExceptions.split(IgnoredExceptionsVec, ",", -1, false); llvm::transform(IgnoredExceptionsVec, IgnoredExceptionsVec.begin(), [](StringRef S) { return S.trim(); }); IgnoredExceptions.insert_range(IgnoredExceptionsVec); diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h index 39da124..757a933 100644 --- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h +++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h @@ -30,7 +30,7 @@ public: void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - std::string RawIgnoredExceptions; + StringRef RawIgnoredExceptions; utils::ExceptionAnalyzer Tracer; }; diff --git a/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c b/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c new file mode 100644 index 0000000..ef64350 --- /dev/null +++ b/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c @@ -0,0 +1,10 @@ +// RUN: clang-reorder-fields -record-name Foo -fields-order z,y,x %s -- 2>&1 | FileCheck --check-prefix=CHECK-BAD %s +// RUN: clang-reorder-fields -record-name Foo -fields-order y,x,z %s -- | FileCheck --check-prefix=CHECK-GOOD %s + +// CHECK-BAD: {{^Flexible array member must remain the last field in the struct}} + +struct Foo { + int x; // CHECK-GOOD: {{^ int y;}} + int y; // CHECK-GOOD-NEXT: {{^ int x;}} + int z[]; // CHECK-GOOD-NEXT: {{^ int z\[\];}} +}; diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e8deae5..79dc0b2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -433,6 +433,8 @@ Bug Fixes to C++ Support object type. (#GH151531) - Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409). - Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610) +- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196) +- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 4d9e123..c724136 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning< def err_capture_default_first : Error< "capture default must be first">; def ext_decl_attrs_on_lambda : ExtWarn< - "%select{an attribute specifier sequence|%0}1 in this position " + "%select{an attribute specifier sequence|%1}0 in this position " "is a C++23 extension">, InGroup<CXX23AttrsOnLambda>; def ext_lambda_missing_parens : ExtWarn< "lambda without a parameter clause is a C++23 extension">, diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index bb39444..e1be08c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [ //===----------------------------------------------------------------------===// defvar CIR_YieldableScopes = [ - "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp", - "SwitchOp", "TernaryOp", "WhileOp" + "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp", + "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp" ]; def CIR_YieldOp : CIR_Op<"yield", [ @@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr< // is upstreamed. def CIR_GlobalOp : CIR_Op<"global", [ - DeclareOpInterfaceMethods<CIRGlobalValueInterface> + DeclareOpInterfaceMethods<RegionBranchOpInterface>, + DeclareOpInterfaceMethods<CIRGlobalValueInterface>, + NoRegionArguments ]> { let summary = "Declare or define a global variable"; let description = [{ @@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [ UnitAttr:$dso_local, OptionalAttr<I64Attr>:$alignment); + let regions = (region MaxSizedRegion<1>:$ctorRegion, + MaxSizedRegion<1>:$dtorRegion); + let assemblyFormat = [{ ($sym_visibility^)? (`` $global_visibility^)? @@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [ (`comdat` $comdat^)? (`dso_local` $dso_local^)? $sym_name - custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value) + custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value, + $ctorRegion, $dtorRegion) attr-dict }]; let extraClassDeclaration = [{ - bool isDeclaration() { return !getInitialValue(); } + bool isDeclaration() { + return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty(); + } bool hasInitializer() { return !isDeclaration(); } }]; let skipDefaultBuilders = 1; - let builders = [OpBuilder<(ins - "llvm::StringRef":$sym_name, - "mlir::Type":$sym_type, - CArg<"bool", "false">:$isConstant, - // CIR defaults to external linkage. - CArg<"cir::GlobalLinkageKind", - "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>]; + let builders = [ + OpBuilder<(ins + "llvm::StringRef":$sym_name, + "mlir::Type":$sym_type, + CArg<"bool", "false">:$isConstant, + // CIR defaults to external linkage. + CArg<"cir::GlobalLinkageKind", + "cir::GlobalLinkageKind::ExternalLinkage">:$linkage, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$ctorBuilder, + CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>", + "nullptr">:$dtorBuilder) + > + ]; let hasVerifier = 1; diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index cb8fe6c..9d12a13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass( bool nullCheckValue, SourceLocation loc) { assert(!path.empty() && "Base path should not be empty!"); + CastExpr::path_const_iterator start = path.begin(); + const CXXRecordDecl *vBase = nullptr; + if ((*path.begin())->isVirtual()) { - // The implementation here is actually complete, but let's flag this - // as an error until the rest of the virtual base class support is in place. - cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base"); - return Address::invalid(); + vBase = (*start)->getType()->castAsCXXRecordDecl(); + ++start; } // Compute the static offset of the ultimate destination within its // allocating subobject (the virtual base, if there is one, or else // the "complete" object that we see). - CharUnits nonVirtualOffset = - cgm.computeNonVirtualBaseClassOffset(derived, path); + CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset( + vBase ? vBase : derived, {start, path.end()}); + + // If there's a virtual step, we can sometimes "devirtualize" it. + // For now, that's limited to when the derived type is final. + // TODO: "devirtualize" this for accesses to known-complete objects. + if (vBase && derived->hasAttr<FinalAttr>()) { + const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived); + CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase); + nonVirtualOffset += vBaseOffset; + vBase = nullptr; // we no longer have a virtual step + } // Get the base pointer type. mlir::Type baseValueTy = convertType((path.end()[-1])->getType()); assert(!cir::MissingFeatures::addressSpace()); - // The if statement here is redundant now, but it will be needed when we add - // support for virtual base classes. // If there is no virtual base, use cir.base_class_addr. It takes care of // the adjustment and the null pointer check. - if (nonVirtualOffset.isZero()) { + if (nonVirtualOffset.isZero() && !vBase) { assert(!cir::MissingFeatures::sanitizers()); return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0, /*assumeNotNull=*/true); @@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass( assert(!cir::MissingFeatures::sanitizers()); - // Apply the offset - value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, - nonVirtualOffset.getQuantity(), - /*assumeNotNull=*/true); + // Compute the virtual offset. + mlir::Value virtualOffset = nullptr; + if (vBase) { + virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset( + getLoc(loc), *this, value, derived, vBase); + } + + // Apply both offsets. + value = applyNonVirtualAndVirtualOffset( + getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived, + vBase, baseValueTy, not nullCheckValue); // Cast to the destination type. value = value.withElementType(builder, baseValueTy); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index b93d9a9..f4bbced 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -676,6 +676,10 @@ public: mlir::Value VisitRealImag(const UnaryOperator *e, QualType promotionType = QualType()); + mlir::Value VisitUnaryExtension(const UnaryOperator *e) { + return Visit(e->getSubExpr()); + } + mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) { CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die); return Visit(die->getExpr()); @@ -1278,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e, } else if (const auto *uo = dyn_cast<UnaryOperator>(e)) { switch (uo->getOpcode()) { case UO_Imag: - cgf.cgm.errorNYI(e->getSourceRange(), - "ScalarExprEmitter::emitPromoted unary imag"); - return {}; case UO_Real: return VisitRealImag(uo, promotionType); case UO_Minus: diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index e842892..644c383 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s, case Stmt::OMPSimdDirectiveClass: case Stmt::OMPTileDirectiveClass: case Stmt::OMPUnrollDirectiveClass: + case Stmt::OMPFuseDirectiveClass: case Stmt::OMPForDirectiveClass: case Stmt::OMPForSimdDirectiveClass: case Stmt::OMPSectionsDirectiveClass: diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 58ef500..fb87036 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() { return success(); } -void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, - llvm::StringRef sym_name, mlir::Type sym_type, - bool isConstant, cir::GlobalLinkageKind linkage) { +void cir::GlobalOp::build( + OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name, + mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage, + function_ref<void(OpBuilder &, Location)> ctorBuilder, + function_ref<void(OpBuilder &, Location)> dtorBuilder) { odsState.addAttribute(getSymNameAttrName(odsState.name), odsBuilder.getStringAttr(sym_name)); odsState.addAttribute(getSymTypeAttrName(odsState.name), @@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState, cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage); odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr); + Region *ctorRegion = odsState.addRegion(); + if (ctorBuilder) { + odsBuilder.createBlock(ctorRegion); + ctorBuilder(odsBuilder, odsState.location); + } + + Region *dtorRegion = odsState.addRegion(); + if (dtorBuilder) { + odsBuilder.createBlock(dtorRegion); + dtorBuilder(odsBuilder, odsState.location); + } + odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name), cir::VisibilityAttr::get(odsBuilder.getContext())); } +/// Given the region at `index`, or the parent operation if `index` is None, +/// return the successor regions. These are the regions that may be selected +/// during the flow of control. `operands` is a set of optional attributes that +/// correspond to a constant value for each operand, or null if that operand is +/// not a constant. +void cir::GlobalOp::getSuccessorRegions( + mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> ®ions) { + // The `ctor` and `dtor` regions always branch back to the parent operation. + if (!point.isParent()) { + regions.push_back(RegionSuccessor()); + return; + } + + // Don't consider the ctor region if it is empty. + Region *ctorRegion = &this->getCtorRegion(); + if (ctorRegion->empty()) + ctorRegion = nullptr; + + // Don't consider the dtor region if it is empty. + Region *dtorRegion = &this->getCtorRegion(); + if (dtorRegion->empty()) + dtorRegion = nullptr; + + // If the condition isn't constant, both regions may be executed. + if (ctorRegion) + regions.push_back(RegionSuccessor(ctorRegion)); + if (dtorRegion) + regions.push_back(RegionSuccessor(dtorRegion)); +} + static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op, - TypeAttr type, - Attribute initAttr) { + TypeAttr type, Attribute initAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { + auto printType = [&]() { p << ": " << type; }; if (!op.isDeclaration()) { p << "= "; - // This also prints the type... - if (initAttr) - printConstant(p, initAttr); + if (!ctorRegion.empty()) { + p << "ctor "; + printType(); + p << " "; + p.printRegion(ctorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } else { + // This also prints the type... + if (initAttr) + printConstant(p, initAttr); + } + + if (!dtorRegion.empty()) { + p << " dtor "; + p.printRegion(dtorRegion, + /*printEntryBlockArgs=*/false, + /*printBlockTerminators=*/false); + } } else { - p << ": " << type; + printType(); } } -static ParseResult -parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, - Attribute &initialValueAttr) { +static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, + TypeAttr &typeAttr, + Attribute &initialValueAttr, + mlir::Region &ctorRegion, + mlir::Region &dtorRegion) { mlir::Type opTy; if (parser.parseOptionalEqual().failed()) { // Absence of equal means a declaration, so we need to parse the type. @@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, if (parser.parseColonType(opTy)) return failure(); } else { - // Parse constant with initializer, examples: - // cir.global @y = #cir.fp<1.250000e+00> : !cir.double - // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> - if (parseConstantValue(parser, initialValueAttr).failed()) - return failure(); + // Parse contructor, example: + // cir.global @rgb = ctor : type { ... } + if (!parser.parseOptionalKeyword("ctor")) { + if (parser.parseColonType(opTy)) + return failure(); + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed()) + return failure(); + } else { + // Parse constant with initializer, examples: + // cir.global @y = 3.400000e+00 : f32 + // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>> + if (parseConstantValue(parser, initialValueAttr).failed()) + return failure(); + + assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && + "Non-typed attrs shouldn't appear here."); + auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); + opTy = typedAttr.getType(); + } - assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) && - "Non-typed attrs shouldn't appear here."); - auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr); - opTy = typedAttr.getType(); + // Parse destructor, example: + // dtor { ... } + if (!parser.parseOptionalKeyword("dtor")) { + auto parseLoc = parser.getCurrentLocation(); + if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{})) + return failure(); + if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed()) + return failure(); + } } typeAttr = TypeAttr::get(opTy); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 57db20f7..64f1917 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (std::optional<GCOVOptions> Options = getGCOVOptions(CodeGenOpts, LangOpts)) PB.registerPipelineStartEPCallback( - [Options](ModulePassManager &MPM, OptimizationLevel Level) { - MPM.addPass(GCOVProfilerPass(*Options)); + [this, Options](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass( + GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr())); }); if (std::optional<InstrProfOptions> Options = getInstrProfOptions(CodeGenOpts, LangOpts)) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 07cf08c..6596ec0 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, {Src0, Src1}); } +static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) { + if (AMDGCNScope == "agent") + return "device"; + if (AMDGCNScope == "wavefront") + return "subgroup"; + return AMDGCNScope; +} + // For processing memory ordering and memory scope arguments of various // amdgcn builtins. -// \p Order takes a C++11 comptabile memory-ordering specifier and converts +// \p Order takes a C++11 compatible memory-ordering specifier and converts // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. @@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, // Some of the atomic builtins take the scope as a string name. StringRef scp; if (llvm::getConstantStringInfo(Scope, scp)) { + if (getTarget().getTriple().isSPIRV()) + scp = mapScopeToSPIRV(scp); SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } @@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, SSID = llvm::SyncScope::System; break; case 1: // __MEMORY_SCOPE_DEVICE - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); break; case 2: // __MEMORY_SCOPE_WRKGRP SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); break; case 3: // __MEMORY_SCOPE_WVFRNT - SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); break; case 4: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; @@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, // // The global/flat cases need to use agent scope to consistently produce // the native instruction instead of a cmpxchg expansion. - SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); + if (getTarget().getTriple().isSPIRV()) + SSID = getLLVMContext().getOrInsertSyncScopeID("device"); + else + SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); AO = AtomicOrdering::Monotonic; // The v2bf16 builtin uses i16 instead of a natural bfloat type. diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 8605ba2..a2c6957 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_decl_attrs_on_lambda : diag::ext_decl_attrs_on_lambda) - << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute(); + << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo(); MaybeParseCXX11Attributes(D); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3b267c1..3302bfc 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -20108,8 +20108,9 @@ static void DoMarkVarDeclReferenced( bool NeededForConstantEvaluation = isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr; - bool NeedDefinition = - OdrUse == OdrUseContext::Used || NeededForConstantEvaluation; + bool NeedDefinition = OdrUse == OdrUseContext::Used || + NeededForConstantEvaluation || + Var->getType()->isUndeducedType(); assert(!isa<VarTemplatePartialSpecializationDecl>(Var) && "Can't instantiate a partial template specialization."); diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp index e901631..4c396d3 100644 --- a/clang/test/CIR/CodeGen/complex.cpp +++ b/clang/test/CIR/CodeGen/complex.cpp @@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() { // OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float // OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half // OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2 + +void real_on_scalar_from_imag_with_type_promotion() { + _Float16 _Complex a; + _Float16 b = __real__(__imag__ a); +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16> +// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16 +// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float +// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float +// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float> +// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float +// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16 +// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16> + +// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2 +// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2 +// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2 +// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0 +// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1 +// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float +// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0 +// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1 +// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 + +// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2 +// OGCG: %[[B_ADDR:.*]] = alloca half, align 2 +// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1 +// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2 +// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float +// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half +// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2 diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp index 9139651..4d57f8e 100644 --- a/clang/test/CIR/CodeGen/vbase.cpp +++ b/clang/test/CIR/CodeGen/vbase.cpp @@ -13,19 +13,29 @@ public: class Derived : public virtual Base {}; -// This is just here to force the record types to be emitted. void f() { Derived d; + d.f(); +} + +class DerivedFinal final : public virtual Base {}; + +void g() { + DerivedFinal df; + df.f(); } // CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}> // CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}> +// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}> // LLVM: %class.Derived = type { %class.Base } // LLVM: %class.Base = type { ptr } +// LLVM: %class.DerivedFinal = type { %class.Base } // OGCG: %class.Derived = type { %class.Base } // OGCG: %class.Base = type { ptr } +// OGCG: %class.DerivedFinal = type { %class.Base } // Test the constructor handling for a class with a virtual base. struct A { @@ -47,6 +57,76 @@ void ppp() { B b; } // OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8 +// CIR: cir.func {{.*}}@_Z1fv() { +// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init] +// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> () +// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr> +// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i> +// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i +// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i> +// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i +// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i> +// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived> +// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// CIR: cir.func {{.*}}@_Z1gv() { +// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init] +// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> () +// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base> +// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr> +// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr +// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>> +// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>> +// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> () +// CIR: cir.return + +// LLVM: define {{.*}}void @_Z1fv() +// LLVM: %[[D:.+]] = alloca {{.*}} +// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]]) +// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]] +// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32 +// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]] +// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]] +// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0 +// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]] +// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]]) +// LLVM: ret void + +// LLVM: define {{.*}}void @_Z1gv() +// LLVM: %[[DF:.+]] = alloca {{.*}} +// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]]) +// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]] +// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0 +// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]] +// LLVM: call void %[[VFN2]](ptr %[[DF]]) +// LLVM: ret void + +// OGCG: define {{.*}}void @_Z1fv() +// OGCG: %[[D:.+]] = alloca {{.*}} +// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]]) +// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]] +// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32 +// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]] +// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]] +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]]) +// OGCG: ret void + +// OGCG: define {{.*}}void @_Z1gv() +// OGCG: %[[DF:.+]] = alloca {{.*}} +// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]]) +// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]]) +// OGCG: ret void + // Constructor for B // CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B> // CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init] diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp index 8b5379a..8bca48d 100644 --- a/clang/test/CIR/CodeGen/vector-ext.cpp +++ b/clang/test/CIR/CodeGen/vector-ext.cpp @@ -1322,3 +1322,23 @@ void logical_not() { // OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp index d8fdeea..f242779 100644 --- a/clang/test/CIR/CodeGen/vector.cpp +++ b/clang/test/CIR/CodeGen/vector.cpp @@ -1390,3 +1390,23 @@ void logical_not_float() { // OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer // OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32> // OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16 + +void unary_extension() { + vi4 a; + vi4 b = __extension__ a; +} + +// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"] +// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init] +// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i> +// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16 +// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 + +// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16 +// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16 +// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16 diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir new file mode 100644 index 0000000..727c067 --- /dev/null +++ b/clang/test/CIR/IR/global-init.cir @@ -0,0 +1,48 @@ +// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s + +!u8i = !cir.int<u, 8> + +!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}> +!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}> +!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}> + +module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { + cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>) + cir.global external @needsCtor = ctor : !rec_NeedsCtor { + %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + } + // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor { + // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor> + // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> () + // CHECK: } + + cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>) + cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + } + // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor { + // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor> + // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> () + // CHECK: } + + cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } dtor { + %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + } + // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>) + // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } dtor { + // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor> + // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> () + // CHECK: } +} diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp index 5920ced..137a49b 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj( // CHECK-NEXT: entry: @@ -21,6 +24,43 @@ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z29test_non_volatile_parameter64Py( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter32PVj( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) { __UINT32_TYPE__ res; @@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z25test_volatile_parameter64PVy( +// GCN-NEXT: entry: +// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr +// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr +// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8 +// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) { __UINT64_TYPE__ res; @@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() { // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_shared64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_shared64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_shared64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global32() { global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup"); @@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z13test_global64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z13test_global64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_global64() { global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup"); @@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant32() { __UINT32_TYPE__ local_val; @@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64; // CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z15test_constant64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5) +// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z15test_constant64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8 +// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4) +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_constant64() { __UINT64_TYPE__ local_val; @@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() { // CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_order64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_order64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_order64() { __attribute__((shared)) __UINT64_TYPE__ val; @@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope32v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope32v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope32() { __attribute__((shared)) __UINT32_TYPE__ val; @@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() { // CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] // CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 // CHECK-NEXT: ret void +// GCN-LABEL: @_Z12test_scope64v( +// GCN-NEXT: entry: +// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]] +// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8 +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: @_Z12test_scope64v( +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]] +// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8 +// AMDGCNSPIRV-NEXT: ret void // __attribute__((device)) void test_scope64() { __attribute__((shared)) __UINT64_TYPE__ val; diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp index 1e977dd..dd1ca45 100644 --- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp +++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp @@ -1,7 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ -// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s +// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s +// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \ +// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s // CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv( // CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -12,6 +15,25 @@ // CHECK-NEXT: fence syncscope("agent") acq_rel // CHECK-NEXT: fence syncscope("workgroup") release // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv( +// GCN-SAME: ) #[[ATTR0:[0-9]+]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst +// GCN-NEXT: fence syncscope("agent") acquire +// GCN-NEXT: fence seq_cst +// GCN-NEXT: fence syncscope("agent") acq_rel +// GCN-NEXT: fence syncscope("workgroup") release +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire +// AMDGCNSPIRV-NEXT: fence seq_cst +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release +// AMDGCNSPIRV-NEXT: ret void // void test_memory_fence_success() { @@ -35,6 +57,25 @@ void test_memory_fence_success() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_localv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_local() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -58,6 +99,25 @@ void test_local() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z11test_globalv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]] +// GCN-NEXT: fence seq_cst, !mmra [[META4]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]] +// AMDGCNSPIRV-NEXT: ret void // void test_global() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global"); @@ -80,6 +140,25 @@ void test_global() { // CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] // CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_imagev( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]] +// GCN-NEXT: fence seq_cst, !mmra [[META3]] +// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]] +// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]] +// AMDGCNSPIRV-NEXT: ret void // void test_image() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local"); @@ -99,13 +178,33 @@ void test_image() { // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] // CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] // CHECK-NEXT: ret void +// GCN-LABEL: define dso_local void @_Z10test_mixedv( +// GCN-SAME: ) #[[ATTR0]] { +// GCN-NEXT: entry: +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// GCN-NEXT: ret void +// +// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv( +// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] { +// AMDGCNSPIRV-NEXT: entry: +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]] +// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]] +// AMDGCNSPIRV-NEXT: ret void // void test_mixed() { __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global"); __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local"); } -//. // CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} // CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} // CHECK: [[META5]] = !{[[META4]], [[META3]]} //. +// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// GCN: [[META5]] = !{[[META4]], [[META3]]} +//. +// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"} +// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"} +// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]} +//. diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp new file mode 100644 index 0000000..06a496e --- /dev/null +++ b/clang/test/CodeGenCXX/gh56652.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s + +namespace GH56652{ + +struct foo {}; + +template <typename T> struct bar { + using type = T; + + template <foo> inline static constexpr auto b = true; +}; + +template <typename T> +concept C = requires(T a) { T::template b<foo{}>; }; + +template <typename T> auto fn(T) { + if constexpr (!C<T>) + return foo{}; + else + return T{}; +} + +auto a = decltype(fn(bar<int>{})){}; + +} + +namespace GH116319 { + +template <int = 0> struct a { +template <class> static constexpr auto b = 2; +template <class> static void c() noexcept(noexcept(b<int>)) {} +}; + +void test() { a<>::c<int>(); } + + +} + +// CHECK: %"struct.GH56652::bar" = type { i8 } +// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any +// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index 19ab656..7cd3f14 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -1,13 +1,13 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s typedef unsigned int uint; typedef unsigned long ulong; @@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() { } // CHECK-LABEL: @test_global_add_f32 -// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} +// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}} #if !defined(__SPIRV__) void test_global_add_f32(float *rtn, global float *addr, float x) { #else diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl index 5f202ba..6bb20bf 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl @@ -1,9 +1,9 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s #pragma OPENCL EXTENSION cl_khr_fp16 : enable @@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1) // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}} #if !defined(__SPIRV__) @@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) { // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}} @@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) { // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}} -// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}} +// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}} // CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 039d032..ab0b0b9 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu // CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4 res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup"); - // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4 + // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4 res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent"); // CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4 diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp index 7ffb7aae..8c7a778 100644 --- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp +++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp @@ -1,9 +1,7 @@ -// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20 -// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23 -// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 -// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23 - -//cxx23-no-diagnostics +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23 +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23 auto L1 = [] constexpr {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} @@ -14,3 +12,25 @@ auto L3 = [] static {}; // cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}} // cxx20-warning@-2 {{static lambdas are a C++23 extension}} // precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}} + +namespace GH161070 { +void t1() { int a = [] __arm_streaming; } +// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}} +// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}} + +void t2() { int a = [] [[assume(true)]]; } +// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}} +// precxx23-error@-2 {{expected body of lambda expression}} +// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}} +// cxx23-error@-4 {{expected body of lambda expression}} +// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}} +// cxx20-error@-6 {{expected body of lambda expression}} +// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}} +// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}} +} diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp index 739485b..45a4c4c 100644 --- a/clang/test/SemaCXX/decltype.cpp +++ b/clang/test/SemaCXX/decltype.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s +// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s // PR5290 int const f0(); @@ -156,6 +157,8 @@ struct A { } }; + + // This shouldn't crash. static_assert(A<int>().f<int>() == 0, ""); // The result should not be dependent. @@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion // expected-note@-1 {{expression evaluates to '0 != 0'}} } + +#if __cplusplus >= 201703L +namespace GH160497 { + +template <class> struct S { + template <class> + inline static auto mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return 42; + }(); +}; + +using T = decltype(S<void>::mem<void>); + // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}} + + +template <class> struct S2 { + template <class> + inline static auto* mem = + [] { static_assert(false); // expected-error {{static assertion failed}} \ + // expected-note {{while substituting into a lambda expression here}} + return static_cast<int*>(nullptr); + }(); +}; + +using T2 = decltype(S2<void>::mem<void>); +//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}} + +template <class> struct S3 { + template <class> + inline static int mem = // Check we don't instantiate when the type is not deduced. + [] { static_assert(false); + return 42; + }(); +}; + +using T = decltype(S3<void>::mem<void>); +} + +namespace N1 { + +template<class> +struct S { + template<class> + inline static auto mem = 42; +}; + +using T = decltype(S<void>::mem<void>); + +T y = 42; + +} + +namespace GH161196 { + +template <typename> struct A { + static constexpr int digits = 0; +}; + +template <typename> struct B { + template <int, typename MaskInt = int, int = A<MaskInt>::digits> + static constexpr auto XBitMask = 0; +}; + +struct C { + using ReferenceHost = B<int>; + template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask; +}; + +void test() { (void)C::XBitMask<0>; } + +} +#endif + template<typename> class conditional { }; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 0e2758d..e41f4eb 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -420,7 +420,7 @@ public: std::vector<ModuleDeps *> NewMDs; { std::unique_lock<std::mutex> ul(Lock); - for (const ModuleDeps &MD : Graph) { + for (ModuleDeps &MD : Graph) { auto I = Modules.find({MD.ID, 0}); if (I != Modules.end()) { I->first.InputIndex = std::min(I->first.InputIndex, InputIndex); diff --git a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c index 982f3a4..526ba5c 100644 --- a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c +++ b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c @@ -1,24 +1,25 @@ -// XFAIL: target=aarch64-{{.*}}-windows-{{.*}} // RUN: %clang_builtins %s %librt -o %t && %run %t // REQUIRES: librt_has_fixunstfdi #include <stdio.h> +#include "int_lib.h" -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if defined(CRT_HAS_TF_MODE) -#include "int_lib.h" +#define QUAD_PRECISION +#include "fp_lib.h" // Returns: convert a to a unsigned long long, rounding toward zero. // Negative values all become zero. -// Assumption: long double is a 128 bit floating point type +// Assumption: fp_t is a 128 bit floating point type // du_int is a 64 bit integral type -// value in long double is representable in du_int or is negative +// value in fp_t is representable in du_int or is negative // (no range checking performed) -COMPILER_RT_ABI du_int __fixunstfdi(long double a); +COMPILER_RT_ABI du_int __fixunstfdi(fp_t a); -int test__fixunstfdi(long double a, du_int expected) +int test__fixunstfdi(fp_t a, du_int expected) { du_int x = __fixunstfdi(a); if (x != expected) @@ -29,13 +30,13 @@ int test__fixunstfdi(long double a, du_int expected) char assumption_1[sizeof(du_int) == 2*sizeof(su_int)] = {0}; char assumption_2[sizeof(du_int)*CHAR_BIT == 64] = {0}; -char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0}; +char assumption_3[sizeof(fp_t)*CHAR_BIT == 128] = {0}; #endif int main() { -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if defined(CRT_HAS_TF_MODE) if (test__fixunstfdi(0.0, 0)) return 1; diff --git a/compiler-rt/test/builtins/Unit/multc3_test.c b/compiler-rt/test/builtins/Unit/multc3_test.c index e9c99a7..18561cc 100644 --- a/compiler-rt/test/builtins/Unit/multc3_test.c +++ b/compiler-rt/test/builtins/Unit/multc3_test.c @@ -1,24 +1,26 @@ -// XFAIL: target=aarch64-{{.*}}-windows-{{.*}} // RUN: %clang_builtins %s %librt -o %t && %run %t // REQUIRES: librt_has_multc3 #include <stdio.h> +#include "int_lib.h" -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128) + +#define QUAD_PRECISION +#include "fp_lib.h" -#include "int_lib.h" #include <math.h> #include <complex.h> // Returns: the product of a + ib and c + id -COMPILER_RT_ABI long double _Complex -__multc3(long double __a, long double __b, long double __c, long double __d); +COMPILER_RT_ABI Qcomplex +__multc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d); enum {zero, non_zero, inf, NaN, non_zero_nan}; int -classify(long double _Complex x) +classify(Qcomplex x) { if (x == 0) return zero; @@ -41,13 +43,13 @@ classify(long double _Complex x) return non_zero; } -int test__multc3(long double a, long double b, long double c, long double d) +int test__multc3(fp_t a, fp_t b, fp_t c, fp_t d) { - long double _Complex r = __multc3(a, b, c, d); + Qcomplex r = __multc3(a, b, c, d); // printf("test__multc3(%Lf, %Lf, %Lf, %Lf) = %Lf + I%Lf\n", // a, b, c, d, creall(r), cimagl(r)); - long double _Complex dividend; - long double _Complex divisor; + Qcomplex dividend; + Qcomplex divisor; __real__ dividend = a; __imag__ dividend = b; @@ -188,7 +190,7 @@ int test__multc3(long double a, long double b, long double c, long double d) return 0; } -long double x[][2] = +fp_t x[][2] = { { 1.e-6, 1.e-6}, {-1.e-6, 1.e-6}, @@ -348,7 +350,7 @@ long double x[][2] = int main() { -#if _ARCH_PPC || __aarch64__ || __arm64ec__ +#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128) const unsigned N = sizeof(x) / sizeof(x[0]); unsigned i, j; for (i = 0; i < N; ++i) diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h index 010f218..a86cbd8 100644 --- a/libc/src/string/memory_utils/op_generic.h +++ b/libc/src/string/memory_utils/op_generic.h @@ -41,12 +41,22 @@ static_assert((UINTPTR_MAX == 4294967295U) || "We currently only support 32- or 64-bit platforms"); #ifdef LIBC_COMPILER_IS_MSVC - +#ifdef LIBC_TARGET_ARCH_IS_X86 namespace LIBC_NAMESPACE_DECL { using generic_v128 = __m128i; using generic_v256 = __m256i; using generic_v512 = __m512i; } // namespace LIBC_NAMESPACE_DECL +#else +// Special handling when target does not have real vector types. +// We can potentially use uint8x16_t etc. However, MSVC does not provide +// subscript operation. +namespace LIBC_NAMESPACE_DECL { +struct alignas(16) generic_v128 : public cpp::array<uint8_t, 16> {}; +struct alignas(32) generic_v256 : public cpp::array<uint8_t, 32> {}; +struct alignas(64) generic_v512 : public cpp::array<uint8_t, 64> {}; +} // namespace LIBC_NAMESPACE_DECL +#endif #else namespace LIBC_NAMESPACE_DECL { @@ -159,7 +169,8 @@ template <typename T> struct Memset { LIBC_INLINE static void block(Ptr dst, uint8_t value) { if constexpr (is_scalar_v<T> || is_vector_v<T>) { - store<T>(dst, splat<T>(value)); + // Avoid ambiguous call due to ADL + generic::store<T>(dst, splat<T>(value)); } else if constexpr (is_array_v<T>) { using value_type = typename T::value_type; const auto Splat = splat<value_type>(value); diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp index 2730de3..4393f9d 100644 --- a/libc/test/UnitTest/FEnvSafeTest.cpp +++ b/libc/test/UnitTest/FEnvSafeTest.cpp @@ -43,7 +43,7 @@ void FEnvSafeTest::set_fenv(const fenv_t &fenv) { void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv, const fenv_t &after_fenv) { -#if defined(LIBC_TARGET_ARCH_IS_AARCH64) +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC) using FPState = LIBC_NAMESPACE::fputil::FEnv::FPState; const FPState &before_state = reinterpret_cast<const FPState &>(before_fenv); const FPState &after_state = reinterpret_cast<const FPState &>(after_fenv); diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 6ea1ea0..566dde6 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -44,6 +44,9 @@ MinGW Improvements MachO Improvements ------------------ +* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name. + (`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_) + WebAssembly Improvements ------------------------ diff --git a/lld/test/wasm/archive-export.test b/lld/test/wasm/archive-export.test index 9a76d60..c67e500 100644 --- a/lld/test/wasm/archive-export.test +++ b/lld/test/wasm/archive-export.test @@ -14,6 +14,9 @@ CHECK: Exports: CHECK-NEXT: - Name: memory CHECK-NEXT: Kind: MEMORY CHECK-NEXT: Index: 0 +CHECK-NEXT: - Name: __stack_pointer +CHECK-NEXT: Kind: GLOBAL +CHECK-NEXT: Index: 0 CHECK-NEXT: - Name: foo CHECK-NEXT: Kind: FUNCTION CHECK-NEXT: Index: 1 diff --git a/lld/test/wasm/comdats.ll b/lld/test/wasm/comdats.ll index 8fc301e..2dd687f 100644 --- a/lld/test/wasm/comdats.ll +++ b/lld/test/wasm/comdats.ll @@ -35,6 +35,9 @@ entry: ; CHECK-NEXT: - Name: memory ; CHECK-NEXT: Kind: MEMORY ; CHECK-NEXT: Index: 0 +; CHECK-NEXT: - Name: __stack_pointer +; CHECK-NEXT: Kind: GLOBAL +; CHECK-NEXT: Index: 0 ; CHECK-NEXT: - Name: _start ; CHECK-NEXT: Kind: FUNCTION ; CHECK-NEXT: Index: 1 diff --git a/lld/test/wasm/visibility-hidden.ll b/lld/test/wasm/visibility-hidden.ll index 36c29a8e..6ed7ba3 100644 --- a/lld/test/wasm/visibility-hidden.ll +++ b/lld/test/wasm/visibility-hidden.ll @@ -43,6 +43,9 @@ entry: ; CHECK-NEXT: - Name: memory ; CHECK-NEXT: Kind: MEMORY ; CHECK-NEXT: Index: 0 +; CHECK-NEXT: - Name: __stack_pointer +; CHECK-NEXT: Kind: GLOBAL +; CHECK-NEXT: Index: 0 ; CHECK-NEXT: - Name: objectDefault ; CHECK-NEXT: Kind: FUNCTION ; CHECK-NEXT: Index: 1 diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 9b85b6c..46c848d 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -914,9 +914,10 @@ static InputGlobal *createGlobal(StringRef name, bool isMutable) { return make<InputGlobal>(wasmGlobal, nullptr); } -static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable) { +static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable, + uint32_t flags = 0) { InputGlobal *g = createGlobal(name, isMutable); - return symtab->addSyntheticGlobal(name, WASM_SYMBOL_VISIBILITY_HIDDEN, g); + return symtab->addSyntheticGlobal(name, flags, g); } static GlobalSymbol *createOptionalGlobal(StringRef name, bool isMutable) { @@ -966,9 +967,13 @@ static void createSyntheticSymbols() { } if (ctx.arg.sharedMemory) { - ctx.sym.tlsBase = createGlobalVariable("__tls_base", true); - ctx.sym.tlsSize = createGlobalVariable("__tls_size", false); - ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false); + // TLS symbols are all hidden/dso-local + ctx.sym.tlsBase = + createGlobalVariable("__tls_base", true, WASM_SYMBOL_VISIBILITY_HIDDEN); + ctx.sym.tlsSize = createGlobalVariable("__tls_size", false, + WASM_SYMBOL_VISIBILITY_HIDDEN); + ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false, + WASM_SYMBOL_VISIBILITY_HIDDEN); ctx.sym.initTLS = symtab->addSyntheticFunction( "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, make<SyntheticFunction>(is64 ? i64ArgSignature : i32ArgSignature, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index f1e73d7..82e9d86 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -3126,43 +3126,6 @@ void DWARFASTParserClang::ParseSingleMember( if (!member_clang_type.IsCompleteType()) member_clang_type.GetCompleteType(); - { - // Older versions of clang emit the same DWARF for array[0] and array[1]. If - // the current field is at the end of the structure, then there is - // definitely no room for extra elements and we override the type to - // array[0]. This was fixed by f454dfb6b5af. - CompilerType member_array_element_type; - uint64_t member_array_size; - bool member_array_is_incomplete; - - if (member_clang_type.IsArrayType(&member_array_element_type, - &member_array_size, - &member_array_is_incomplete) && - !member_array_is_incomplete) { - uint64_t parent_byte_size = - parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX); - - // If the attrs.member_byte_offset is still set to UINT32_MAX this means - // that the DW_TAG_member didn't have a DW_AT_data_member_location, so - // don't emit an error if this is the case. - if (attrs.member_byte_offset != UINT32_MAX && - attrs.member_byte_offset >= parent_byte_size) { - if (member_array_size != 1 && - (member_array_size != 0 || - attrs.member_byte_offset > parent_byte_size)) { - module_sp->ReportError( - "{0:x8}: DW_TAG_member '{1}' refers to type {2:x16}" - " which extends beyond the bounds of {3:x8}", - die.GetID(), attrs.name, - attrs.encoding_form.Reference().GetOffset(), parent_die.GetID()); - } - - member_clang_type = - m_ast.CreateArrayType(member_array_element_type, 0, false); - } - } - } - TypeSystemClang::RequireCompleteType(member_clang_type); clang::FieldDecl *field_decl = TypeSystemClang::AddFieldToRecordType( diff --git a/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml new file mode 100644 index 0000000..4e659d0 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml @@ -0,0 +1,104 @@ +# This is DWARF where we placed an incomplete type +# at an offset that is the parent DW_AT_byte_size. Check +# that we don't report an error in such cases. +# +# DW_TAG_compile_unit +# DW_AT_name ("main.cpp") +# DW_AT_language (DW_LANG_C) +# +# DW_TAG_structure_type +# DW_AT_name ("Incomplete") +# DW_AT_external (true) +# +# DW_TAG_structure_type +# DW_AT_name ("Foo") +# DW_AT_byte_size (0x04) +# +# DW_TAG_member +# DW_AT_name ("mem") +# DW_AT_data_member_location ("0x04") +# DW_AT_type (0x00000011 "Incomplete") +# +# NULL +# +# NULL + +# RUN: yaml2obj %s > %t +# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s + +# CHECK: Found 1 types: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_str: + - main.cpp + - Incomplete + - Foo + - mem + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Code: 0x2 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_external + Form: DW_FORM_flag_present + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x4 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + debug_info: + - Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x0 + - Value: 0x2 + - AbbrCode: 0x2 + Values: + - Value: 0x9 + - AbbrCode: 0x3 + Values: + - Value: 0x14 + - Value: 0x04 + - AbbrCode: 0x4 + Values: + - Value: 0x18 + - Value: 0x11 + - Value: 0x04 + - AbbrCode: 0x0 + - AbbrCode: 0x0 +... diff --git a/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml new file mode 100644 index 0000000..2ac538e --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml @@ -0,0 +1,109 @@ +# This is malformed DWARF where we placed a non-zero sized type +# at an offset that is larger the parent DW_AT_byte_size. Check +# that we report an error in such cases. +# +# DW_TAG_compile_unit +# DW_AT_name ("main.cpp") +# DW_AT_language (DW_LANG_C) +# +# DW_TAG_base_type +# DW_AT_name ("int") +# DW_AT_encoding (DW_ATE_signed) +# DW_AT_byte_size (0x04) +# +# DW_TAG_structure_type +# DW_AT_name ("Foo") +# DW_AT_byte_size (0x04) +# +# DW_TAG_member +# DW_AT_name ("mem") +# DW_AT_data_member_location ("0x05") +# DW_AT_type (0x00000011 "int") +# +# NULL +# +# NULL + +# RUN: yaml2obj %s > %t +# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s + +# CHECK: Found 1 types: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_str: + - main.cpp + - int + - Foo + - mem + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Code: 0x2 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x4 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + debug_info: + - Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x0 + - Value: 0x2 + - AbbrCode: 0x2 + Values: + - Value: 0x9 + - Value: 0x5 + - Value: 0x4 + - AbbrCode: 0x3 + Values: + - Value: 0x0d + - Value: 0x04 + - AbbrCode: 0x4 + Values: + - Value: 0x11 + - Value: 0x11 + - Value: 0x05 + - AbbrCode: 0x0 + - AbbrCode: 0x0 +... diff --git a/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml new file mode 100644 index 0000000..736697c --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml @@ -0,0 +1,109 @@ +# This is malformed DWARF where we placed a non-zero sized type +# at an offset that is the parent DW_AT_byte_size. Check +# that we report an error in such cases. +# +# DW_TAG_compile_unit +# DW_AT_name ("main.cpp") +# DW_AT_language (DW_LANG_C) +# +# DW_TAG_base_type +# DW_AT_name ("int") +# DW_AT_encoding (DW_ATE_signed) +# DW_AT_byte_size (0x04) +# +# DW_TAG_structure_type +# DW_AT_name ("Foo") +# DW_AT_byte_size (0x04) +# +# DW_TAG_member +# DW_AT_name ("mem") +# DW_AT_data_member_location ("0x04") +# DW_AT_type (0x00000011 "int") +# +# NULL +# +# NULL + +# RUN: yaml2obj %s > %t +# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s + +# CHECK: Found 1 types: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_str: + - main.cpp + - int + - Foo + - mem + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Code: 0x2 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x4 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + debug_info: + - Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x0 + - Value: 0x2 + - AbbrCode: 0x2 + Values: + - Value: 0x9 + - Value: 0x5 + - Value: 0x4 + - AbbrCode: 0x3 + Values: + - Value: 0x0d + - Value: 0x04 + - AbbrCode: 0x4 + Values: + - Value: 0x11 + - Value: 0x11 + - Value: 0x04 + - AbbrCode: 0x0 + - AbbrCode: 0x0 +... diff --git a/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml b/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml index fbdc626..1d1e129 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml +++ b/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml @@ -48,14 +48,10 @@ # RUN: yaml2obj %s > %t # RUN: lldb-test symbols --name=UnionType --find=type %t > %t.stdout # RUN: cat %t.stdout | FileCheck --check-prefix=STDOUT %s -# RUN: lldb-test symbols --name=UnionType --find=type %t 2> %t.stderr -# RUN: cat %t.stderr | FileCheck --allow-empty --check-prefix=STDERR %s # STDOUT: Found 1 types: # STDOUT: {{(0x)?[0-9a-fA-F]+}}: Type{0x0000002b} , name = "UnionType", size = 32, compiler_type = 0x{{[0-9a-fA-F]+}} union UnionType { -# STDERR-NOT: error: union-types-no-member-location.yaml.tmp 0x00000031: DW_TAG_member 'array' refers to type 0x000000000000001f which extends beyond the bounds of 0x0000002b - --- !ELF FileHeader: Class: ELFCLASS64 diff --git a/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml new file mode 100644 index 0000000..a98f62c --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml @@ -0,0 +1,105 @@ +# This is DWARF where we placed a zero-sized type +# at an offset that is the parent DW_AT_byte_size. Check +# that we don't report an error in such cases. +# +# DW_TAG_compile_unit +# DW_AT_name ("main.cpp") +# DW_AT_language (DW_LANG_C) +# +# DW_TAG_structure_type +# DW_AT_name ("Bar") +# DW_AT_byte_size (0x00) +# +# DW_TAG_structure_type +# DW_AT_name ("Foo") +# DW_AT_byte_size (0x04) +# +# DW_TAG_member +# DW_AT_name ("mem") +# DW_AT_data_member_location ("0x04") +# DW_AT_type (0x00000011 "Bar") +# +# NULL +# +# NULL + +# RUN: yaml2obj %s > %t +# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s + +# CHECK: Found 1 types: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +DWARF: + debug_str: + - main.cpp + - Bar + - Foo + - mem + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_language + Form: DW_FORM_udata + - Code: 0x2 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x3 + Tag: DW_TAG_structure_type + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Code: 0x4 + Tag: DW_TAG_member + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_type + Form: DW_FORM_ref4 + - Attribute: DW_AT_data_member_location + Form: DW_FORM_data1 + debug_info: + - Version: 4 + AbbrevTableID: 0 + AbbrOffset: 0x0 + AddrSize: 8 + Entries: + - AbbrCode: 0x1 + Values: + - Value: 0x0 + - Value: 0x2 + - AbbrCode: 0x2 + Values: + - Value: 0x9 + - Value: 0x0 + - AbbrCode: 0x3 + Values: + - Value: 0x0d + - Value: 0x04 + - AbbrCode: 0x4 + Values: + - Value: 0x11 + - Value: 0x11 + - Value: 0x04 + - AbbrCode: 0x0 + - AbbrCode: 0x0 +... diff --git a/llvm/docs/CommandGuide/llvm-readelf.rst b/llvm/docs/CommandGuide/llvm-readelf.rst index 284c3aa4..5403fea 100644 --- a/llvm/docs/CommandGuide/llvm-readelf.rst +++ b/llvm/docs/CommandGuide/llvm-readelf.rst @@ -143,6 +143,10 @@ OPTIONS Display all notes. +.. option:: --offloading + + Display list of HIP offload bundles. + .. option:: --pretty-print When used with :option:`--elf-output-style`, JSON output will be formatted in diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst index 8bd29ea..0d05b94 100644 --- a/llvm/docs/CommandGuide/llvm-readobj.rst +++ b/llvm/docs/CommandGuide/llvm-readobj.rst @@ -104,6 +104,10 @@ file formats. Do not demangle symbol names in the output. This option is only for ELF and XCOFF file formats. The option is enabled by default. +.. option:: --offloading + + Display list of HIP offload bundles. + .. option:: --relocations, --relocs, -r Display the relocation entries in the file. diff --git a/llvm/include/llvm/CAS/FileOffset.h b/llvm/include/llvm/CAS/FileOffset.h new file mode 100644 index 0000000..21d045e --- /dev/null +++ b/llvm/include/llvm/CAS/FileOffset.h @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for FileOffset that represent stored data at an +/// offset from the beginning of a file. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_FILEOFFSET_H +#define LLVM_CAS_FILEOFFSET_H + +#include <cstdlib> + +namespace llvm::cas { + +/// FileOffset is a wrapper around `uint64_t` to represent the offset of data +/// from the beginning of the file. +class FileOffset { +public: + uint64_t get() const { return Offset; } + + explicit operator bool() const { return Offset; } + + FileOffset() = default; + explicit FileOffset(uint64_t Offset) : Offset(Offset) {} + +private: + uint64_t Offset = 0; +}; + +} // namespace llvm::cas + +#endif // LLVM_CAS_FILEOFFSET_H diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h new file mode 100644 index 0000000..5e41bf6 --- /dev/null +++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h @@ -0,0 +1,236 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares interface for OnDiskTrieRawHashMap, a thread-safe and +/// (mostly) lock-free hash map stored as trie and backed by persistent files on +/// disk. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CAS_ONDISKTRIERAWHASHMAP_H +#define LLVM_CAS_ONDISKTRIERAWHASHMAP_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/FileOffset.h" +#include "llvm/Support/Error.h" +#include <optional> + +namespace llvm { + +class raw_ostream; + +namespace cas { + +/// OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps. +/// The keys are fixed length, and are expected to be binary hashes with a +/// normal distribution. +/// +/// - Thread-safety is achieved through the use of atomics within a shared +/// memory mapping. Atomic access does not work on networked filesystems. +/// - Filesystem locks are used, but only sparingly: +/// - during initialization, for creating / opening an existing store; +/// - for the lifetime of the instance, a shared/reader lock is held +/// - during destruction, if there are no concurrent readers, to shrink the +/// files to their minimum size. +/// - Path is used as a directory: +/// - "index" stores the root trie and subtries. +/// - "data" stores (most of) the entries, like a bump-ptr-allocator. +/// - Large entries are stored externally in a file named by the key. +/// - Code is system-dependent and binary format itself is not portable. These +/// are not artifacts that can/should be moved between different systems; they +/// are only appropriate for local storage. +class OnDiskTrieRawHashMap { +public: + LLVM_DUMP_METHOD void dump() const; + void + print(raw_ostream &OS, + function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const; + +public: + /// Const value proxy to access the records stored in TrieRawHashMap. + struct ConstValueProxy { + ConstValueProxy() = default; + ConstValueProxy(ArrayRef<uint8_t> Hash, ArrayRef<char> Data) + : Hash(Hash), Data(Data) {} + ConstValueProxy(ArrayRef<uint8_t> Hash, StringRef Data) + : Hash(Hash), Data(Data.begin(), Data.size()) {} + + ArrayRef<uint8_t> Hash; + ArrayRef<char> Data; + }; + + /// Value proxy to access the records stored in TrieRawHashMap. + struct ValueProxy { + operator ConstValueProxy() const { return ConstValueProxy(Hash, Data); } + + ValueProxy() = default; + ValueProxy(ArrayRef<uint8_t> Hash, MutableArrayRef<char> Data) + : Hash(Hash), Data(Data) {} + + ArrayRef<uint8_t> Hash; + MutableArrayRef<char> Data; + }; + + /// Validate the trie data structure. + /// + /// Callback receives the file offset to the data entry and the data stored. + Error validate( + function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const; + + /// Check the valid range of file offset for OnDiskTrieRawHashMap. + static bool validOffset(FileOffset Offset) { + return Offset.get() < (1LL << 48); + } + +public: + /// Template class to implement a `pointer` type into the trie data structure. + /// + /// It provides pointer-like operation, e.g., dereference to get underlying + /// data. It also reserves the top 16 bits of the pointer value, which can be + /// used to pack additional information if needed. + template <class ProxyT> class PointerImpl { + public: + FileOffset getOffset() const { + return FileOffset(OffsetLow32 | (uint64_t)OffsetHigh16 << 32); + } + + explicit operator bool() const { return IsValue; } + + const ProxyT &operator*() const { + assert(IsValue); + return Value; + } + const ProxyT *operator->() const { + assert(IsValue); + return &Value; + } + + PointerImpl() = default; + + protected: + PointerImpl(ProxyT Value, FileOffset Offset, bool IsValue = true) + : Value(Value), OffsetLow32((uint64_t)Offset.get()), + OffsetHigh16((uint64_t)Offset.get() >> 32), IsValue(IsValue) { + if (IsValue) + assert(validOffset(Offset)); + } + + ProxyT Value; + uint32_t OffsetLow32 = 0; + uint16_t OffsetHigh16 = 0; + + // True if points to a value (not a "nullptr"). Use an extra field because + // 0 can be a valid offset. + bool IsValue = false; + }; + + class pointer; + class const_pointer : public PointerImpl<ConstValueProxy> { + public: + const_pointer() = default; + + private: + friend class pointer; + friend class OnDiskTrieRawHashMap; + using const_pointer::PointerImpl::PointerImpl; + }; + + class pointer : public PointerImpl<ValueProxy> { + public: + operator const_pointer() const { + return const_pointer(Value, getOffset(), IsValue); + } + + pointer() = default; + + private: + friend class OnDiskTrieRawHashMap; + using pointer::PointerImpl::PointerImpl; + }; + + /// Find the value from hash. + /// + /// \returns pointer to the value if exists, otherwise returns a non-value + /// pointer that evaluates to `false` when convert to boolean. + const_pointer find(ArrayRef<uint8_t> Hash) const; + + /// Helper function to recover a pointer into the trie from file offset. + Expected<const_pointer> recoverFromFileOffset(FileOffset Offset) const; + + using LazyInsertOnConstructCB = + function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>; + using LazyInsertOnLeakCB = + function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue, + FileOffset FinalOffset, ValueProxy FinalValue)>; + + /// Insert lazily. + /// + /// \p OnConstruct is called when ready to insert a value, after allocating + /// space for the data. It is called at most once. + /// + /// \p OnLeak is called only if \p OnConstruct has been called and a race + /// occurred before insertion, causing the tentative offset and data to be + /// abandoned. This allows clients to clean up other results or update any + /// references. + /// + /// NOTE: Does *not* guarantee that \p OnConstruct is only called on success. + /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize + /// simultaneous writes, but that seems dangerous to use in a memory-mapped + /// file in case a process crashes in the busy state. + Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct = nullptr, + LazyInsertOnLeakCB OnLeak = nullptr); + + Expected<pointer> insert(const ConstValueProxy &Value) { + return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) { + assert(Allocated.Hash == Value.Hash); + assert(Allocated.Data.size() == Value.Data.size()); + llvm::copy(Value.Data, Allocated.Data.begin()); + }); + } + + size_t size() const; + size_t capacity() const; + + /// Gets or creates a file at \p Path with a hash-mapped trie named \p + /// TrieName. The hash size is \p NumHashBits (in bits) and the records store + /// data of size \p DataSize (in bytes). + /// + /// \p MaxFileSize controls the maximum file size to support, limiting the + /// size of the \a mapped_file_region. \p NewFileInitialSize is the starting + /// size if a new file is created. + /// + /// \p NewTableNumRootBits and \p NewTableNumSubtrieBits are hints to + /// configure the trie, if it doesn't already exist. + /// + /// \pre NumHashBits is a multiple of 8 (byte-aligned). + static Expected<OnDiskTrieRawHashMap> + create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, + uint64_t DataSize, uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + std::optional<size_t> NewTableNumRootBits = std::nullopt, + std::optional<size_t> NewTableNumSubtrieBits = std::nullopt); + + OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS); + OnDiskTrieRawHashMap &operator=(OnDiskTrieRawHashMap &&RHS); + ~OnDiskTrieRawHashMap(); + +private: + struct ImplType; + explicit OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl); + std::unique_ptr<ImplType> Impl; +}; + +} // namespace cas +} // namespace llvm + +#endif // LLVM_CAS_ONDISKTRIERAWHASHMAP_H diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index de9675f..e97160e 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -185,6 +185,14 @@ inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) { LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName); +/// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch +/// weights in the new instruction if the parent function of the original +/// instruction has an entry count. This is to not confuse users by injecting +/// profile data into non-profiled functions. +LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, + Function &F, + StringRef PassName); + /// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their /// entry counts. LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F, diff --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h index f4d5a1d..18be62b 100644 --- a/llvm/include/llvm/Object/OffloadBundle.h +++ b/llvm/include/llvm/Object/OffloadBundle.h @@ -161,7 +161,7 @@ public: OffsetStr.getAsInteger(10, O); Str = Str.drop_front(OffsetStr.size()); - if (Str.consume_front("&size=")) + if (!Str.consume_front("&size=")) return createStringError(object_error::parse_failed, "Reading 'size' in URI"); diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index fa313f5..d6c2d7f 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -64,6 +64,8 @@ protected: /// A worklist of the instructions that need to be simplified. InstructionWorklist &Worklist; + Function &F; + // Mode in which we are running the combiner. const bool MinimizeSize; @@ -98,17 +100,17 @@ protected: bool ComputedBackEdges = false; public: - InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder, - bool MinimizeSize, AAResults *AA, AssumptionCache &AC, - TargetLibraryInfo &TLI, TargetTransformInfo &TTI, - DominatorTree &DT, OptimizationRemarkEmitter &ORE, - BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, - ProfileSummaryInfo *PSI, const DataLayout &DL, + InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder, Function &F, + AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, + TargetTransformInfo &TTI, DominatorTree &DT, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, + const DataLayout &DL, ReversePostOrderTraversal<BasicBlock *> &RPOT) : TTIForTargetIntrinsicsOnly(TTI), Builder(Builder), Worklist(Worklist), - MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), - SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true, - /*CanUseUndef*/ true, &DC), + F(F), MinimizeSize(F.hasMinSize()), AA(AA), AC(AC), TLI(TLI), DT(DT), + DL(DL), SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true, + /*CanUseUndef*/ true, &DC), ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), RPOT(RPOT) {} virtual ~InstCombiner() = default; diff --git a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h index 4d3ead29..f53cee2 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h @@ -20,11 +20,15 @@ namespace llvm { /// The gcov-style instrumentation pass class GCOVProfilerPass : public PassInfoMixin<GCOVProfilerPass> { public: - GCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()) : GCOVOpts(Options) { } + GCOVProfilerPass( + const GCOVOptions &Options = GCOVOptions::getDefault(), + IntrusiveRefCntPtr<vfs::FileSystem> VFS = vfs::getRealFileSystem()) + : GCOVOpts(Options), VFS(std::move(VFS)) {} LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: GCOVOptions GCOVOpts; + IntrusiveRefCntPtr<vfs::FileSystem> VFS; }; } // namespace llvm diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt index 6ed724b..7ae5f7e 100644 --- a/llvm/lib/CAS/CMakeLists.txt +++ b/llvm/lib/CAS/CMakeLists.txt @@ -2,14 +2,19 @@ add_llvm_component_library(LLVMCAS ActionCache.cpp ActionCaches.cpp BuiltinCAS.cpp + DatabaseFile.cpp InMemoryCAS.cpp MappedFileRegionArena.cpp ObjectStore.cpp OnDiskCommon.cpp + OnDiskTrieRawHashMap.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS + LINK_LIBS + ${LLVM_PTHREAD_LIB} + LINK_COMPONENTS Support ) diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp new file mode 100644 index 0000000..db8ce1d --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.cpp @@ -0,0 +1,123 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file implements the common abstractions for CAS database file. +/// +//===----------------------------------------------------------------------===// + +#include "DatabaseFile.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg) { + return createStringError(make_error_code(ErrC), + Path + "[" + TableName + "]: " + Msg); +} + +Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName) { + if (Expected == Observed) + return Error::success(); + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "mismatched " + Label + + " (expected: " + Twine(Expected) + + ", observed: " + Twine(Observed) + ")"); +} + +Expected<DatabaseFile> +DatabaseFile::create(const Twine &Path, uint64_t Capacity, + function_ref<Error(DatabaseFile &)> NewDBConstructor) { + // Constructor for if the file doesn't exist. + auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error { + if (Alloc.capacity() < + sizeof(Header) + sizeof(MappedFileRegionArena::Header)) + return createTableConfigError(std::errc::argument_out_of_domain, + Path.str(), "datafile", + "Allocator too small for header"); + (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}}; + DatabaseFile DB(Alloc); + return NewDBConstructor(DB); + }; + + // Get or create the file. + MappedFileRegionArena Alloc; + if (Error E = MappedFileRegionArena::create(Path, Capacity, sizeof(Header), + NewFileConstructor) + .moveInto(Alloc)) + return std::move(E); + + return DatabaseFile::get( + std::make_unique<MappedFileRegionArena>(std::move(Alloc))); +} + +Error DatabaseFile::addTable(TableHandle Table) { + assert(Table); + assert(&Table.getRegion() == &getRegion()); + int64_t ExistingRootOffset = 0; + const int64_t NewOffset = + reinterpret_cast<const char *>(&Table.getHeader()) - getRegion().data(); + if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset)) + return Error::success(); + + // Silently ignore attempts to set the root to itself. + if (ExistingRootOffset == NewOffset) + return Error::success(); + + // Return an proper error message. + TableHandle Root(getRegion(), ExistingRootOffset); + if (Root.getName() == Table.getName()) + return createStringError( + make_error_code(std::errc::not_supported), + "collision with existing table of the same name '" + Table.getName() + + "'"); + + return createStringError(make_error_code(std::errc::not_supported), + "cannot add new table '" + Table.getName() + + "'" + " to existing root '" + + Root.getName() + "'"); +} + +std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) { + int64_t RootTableOffset = H->RootTableOffset.load(); + if (!RootTableOffset) + return std::nullopt; + + TableHandle Root(getRegion(), RootTableOffset); + if (Root.getName() == Name) + return Root; + + return std::nullopt; +} + +Error DatabaseFile::validate(MappedFileRegion &Region) { + if (Region.size() < sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: missing header"); + + // Check the magic and version. + auto *H = reinterpret_cast<Header *>(Region.data()); + if (H->Magic != getMagic()) + return createStringError(std::errc::invalid_argument, + "database: bad magic"); + if (H->Version != getVersion()) + return createStringError(std::errc::invalid_argument, + "database: wrong version"); + + auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() + + sizeof(Header)); + // Check the bump-ptr, which should point past the header. + if (MFH->BumpPtr.load() < (int64_t)sizeof(Header)) + return createStringError(std::errc::invalid_argument, + "database: corrupt bump-ptr"); + + return Error::success(); +} diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h new file mode 100644 index 0000000..609e5f13 --- /dev/null +++ b/llvm/lib/CAS/DatabaseFile.h @@ -0,0 +1,153 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares the common interface for a DatabaseFile that is used to +/// implement OnDiskCAS. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CAS_DATABASEFILE_H +#define LLVM_LIB_CAS_DATABASEFILE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Support/Error.h" + +namespace llvm::cas::ondisk { + +using MappedFileRegion = MappedFileRegionArena::RegionT; + +/// Generic handle for a table. +/// +/// Generic table header layout: +/// - 2-bytes: TableKind +/// - 2-bytes: TableNameSize +/// - 4-bytes: TableNameRelOffset (relative to header) +class TableHandle { +public: + enum class TableKind : uint16_t { + TrieRawHashMap = 1, + DataAllocator = 2, + }; + struct Header { + TableKind Kind; + uint16_t NameSize; + int32_t NameRelOffset; ///< Relative to Header. + }; + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + MappedFileRegion &getRegion() const { return *Region; } + + template <class T> static void check() { + static_assert( + std::is_same<decltype(T::Header::GenericHeader), Header>::value, + "T::GenericHeader should be of type TableHandle::Header"); + static_assert(offsetof(typename T::Header, GenericHeader) == 0, + "T::GenericHeader must be the head of T::Header"); + } + template <class T> bool is() const { return T::Kind == H->Kind; } + template <class T> T dyn_cast() const { + check<T>(); + if (is<T>()) + return T(*Region, *reinterpret_cast<typename T::Header *>(H)); + return T(); + } + template <class T> T cast() const { + assert(is<T>()); + return dyn_cast<T>(); + } + + StringRef getName() const { + auto *Begin = reinterpret_cast<const char *>(H) + H->NameRelOffset; + return StringRef(Begin, H->NameSize); + } + + TableHandle() = default; + TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {} + TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : TableHandle(Region, + *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +/// Encapsulate a database file, which: +/// - Sets/checks magic. +/// - Sets/checks version. +/// - Points at an arbitrary root table. +/// - Sets up a MappedFileRegionArena for allocation. +/// +/// Top-level layout: +/// - 4-bytes: Magic +/// - 4-bytes: Version +/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset) +/// - 8-bytes: BumpPtr from MappedFileRegionArena +class DatabaseFile { +public: + static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; } + static constexpr uint32_t getVersion() { return 1UL; } + struct Header { + uint32_t Magic; + uint32_t Version; + std::atomic<int64_t> RootTableOffset; + }; + + const Header &getHeader() { return *H; } + MappedFileRegionArena &getAlloc() { return Alloc; } + MappedFileRegion &getRegion() { return Alloc.getRegion(); } + + /// Add a table. This is currently not thread safe and should be called inside + /// NewDBConstructor. + Error addTable(TableHandle Table); + + /// Find a table. May return null. + std::optional<TableHandle> findTable(StringRef Name); + + /// Create the DatabaseFile at Path with Capacity. + static Expected<DatabaseFile> + create(const Twine &Path, uint64_t Capacity, + function_ref<Error(DatabaseFile &)> NewDBConstructor); + + size_t size() const { return Alloc.size(); } + +private: + static Expected<DatabaseFile> + get(std::unique_ptr<MappedFileRegionArena> Alloc) { + if (Error E = validate(Alloc->getRegion())) + return std::move(E); + return DatabaseFile(std::move(Alloc)); + } + + static Error validate(MappedFileRegion &Region); + + DatabaseFile(MappedFileRegionArena &Alloc) + : H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {} + DatabaseFile(std::unique_ptr<MappedFileRegionArena> Alloc) + : DatabaseFile(*Alloc) { + OwnedAlloc = std::move(Alloc); + } + + Header *H = nullptr; + MappedFileRegionArena &Alloc; + std::unique_ptr<MappedFileRegionArena> OwnedAlloc; +}; + +Error createTableConfigError(std::errc ErrC, StringRef Path, + StringRef TableName, const Twine &Msg); + +Error checkTable(StringRef Label, size_t Expected, size_t Observed, + StringRef Path, StringRef TrieName); + +} // namespace llvm::cas::ondisk + +#endif diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp new file mode 100644 index 0000000..9b382dd7 --- /dev/null +++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp @@ -0,0 +1,1178 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Implements OnDiskTrieRawHashMap. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "DatabaseFile.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/TrieHashIndexGenerator.h" +#include "llvm/CAS/MappedFileRegionArena.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::cas; +using namespace llvm::cas::ondisk; + +#if LLVM_ENABLE_ONDISK_CAS + +//===----------------------------------------------------------------------===// +// TrieRawHashMap data structures. +//===----------------------------------------------------------------------===// + +namespace { + +class SubtrieHandle; +class TrieRawHashMapHandle; +class TrieVisitor; + +/// A value stored in the slots inside a SubTrie. A stored value can either be a +/// subtrie (encoded after negation) which is the file offset to another +/// subtrie, or it can be a fileset to a DataRecord. +class SubtrieSlotValue { +public: + explicit operator bool() const { return !isEmpty(); } + bool isEmpty() const { return !Offset; } + bool isData() const { return Offset > 0; } + bool isSubtrie() const { return Offset < 0; } + uint64_t asData() const { + assert(isData()); + return Offset; + } + uint64_t asSubtrie() const { + assert(isSubtrie()); + return -Offset; + } + + FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); } + + FileOffset asDataFileOffset() const { return FileOffset(asData()); } + + int64_t getRawOffset() const { return Offset; } + + static SubtrieSlotValue getDataOffset(int64_t Offset) { + return SubtrieSlotValue(Offset); + } + + static SubtrieSlotValue getSubtrieOffset(int64_t Offset) { + return SubtrieSlotValue(-Offset); + } + + static SubtrieSlotValue getDataOffset(FileOffset Offset) { + return getDataOffset(Offset.get()); + } + + static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) { + return getDataOffset(Offset.get()); + } + + static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) { + return SubtrieSlotValue(Slot.load()); + } + + SubtrieSlotValue() = default; + +private: + friend class SubtrieHandle; + explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {} + int64_t Offset = 0; +}; + +/// Subtrie layout: +/// - 2-bytes: StartBit +/// - 1-bytes: NumBits=lg(num-slots) +/// - 5-bytes: 0-pad +/// - <slots> +class SubtrieHandle { +public: + struct Header { + /// The bit this subtrie starts on. + uint16_t StartBit; + + /// The number of bits this subtrie handles. It has 2^NumBits slots. + uint8_t NumBits; + + /// 0-pad to 8B. + uint8_t ZeroPad1B; + uint32_t ZeroPad4B; + }; + + /// Slot storage: + /// - zero: Empty + /// - positive: RecordOffset + /// - negative: SubtrieOffset + using SlotT = std::atomic<int64_t>; + + static int64_t getSlotsSize(uint32_t NumBits) { + return sizeof(int64_t) * (1u << NumBits); + } + + static int64_t getSize(uint32_t NumBits) { + return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits); + } + + int64_t getSize() const { return getSize(H->NumBits); } + size_t getNumSlots() const { return Slots.size(); } + + SubtrieSlotValue load(size_t I) const { + return SubtrieSlotValue(Slots[I].load()); + } + void store(size_t I, SubtrieSlotValue V) { + return Slots[I].store(V.getRawOffset()); + } + + void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const; + + /// Return None on success, or the existing offset on failure. + bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected, + SubtrieSlotValue New) { + return Slots[I].compare_exchange_strong(Expected.Offset, New.Offset); + } + + /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with + /// \p NumSubtrieBits. + /// + /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a + /// new subtrie is created that isn't used because of a lost race, then it If + /// it's already valid, it should be used instead of allocating a new one. + /// should be returned as an out parameter to be passed back in the future. + /// If it's already valid, it should be used instead of allocating a new one. + /// + /// Returns the subtrie that now lives at \p I. + Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V, + MappedFileRegionArena &Alloc, + size_t NumSubtrieBits, + SubtrieHandle &UnusedSubtrie, size_t NewI); + + /// Only safe if the subtrie is empty. + void reinitialize(uint32_t StartBit, uint32_t NumBits); + + SubtrieSlotValue getOffset() const { + return SubtrieSlotValue::getSubtrieOffset( + reinterpret_cast<const char *>(H) - Region->data()); + } + + FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); } + + explicit operator bool() const { return H; } + + Header &getHeader() const { return *H; } + uint32_t getStartBit() const { return H->StartBit; } + uint32_t getNumBits() const { return H->NumBits; } + + static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc, + uint32_t StartBit, uint32_t NumBits); + + static SubtrieHandle getFromFileOffset(MappedFileRegion &Region, + FileOffset Offset) { + return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset)); + } + + SubtrieHandle() = default; + SubtrieHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H), Slots(getSlots(H)) {} + SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset) + : SubtrieHandle(Region, *reinterpret_cast<Header *>( + Region.data() + Offset.asSubtrie())) {} + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; + MutableArrayRef<SlotT> Slots; + + static MutableArrayRef<SlotT> getSlots(Header &H) { + return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits); + } +}; + +/// Handle for a TrieRawHashMap table. +/// +/// TrieRawHashMap table layout: +/// - [8-bytes: Generic table header] +/// - 1-byte: NumSubtrieBits +/// - 1-byte: Flags (not used yet) +/// - 2-bytes: NumHashBits +/// - 4-bytes: RecordDataSize (in bytes) +/// - 8-bytes: RootTrieOffset +/// - 8-bytes: AllocatorOffset (reserved for implementing free lists) +/// - <name> '\0' +/// +/// Record layout: +/// - <hash> +/// - <data> +class TrieRawHashMapHandle { +public: + static constexpr TableHandle::TableKind Kind = + TableHandle::TableKind::TrieRawHashMap; + + struct Header { + TableHandle::Header GenericHeader; + uint8_t NumSubtrieBits; + uint8_t Flags; ///< None used yet. + uint16_t NumHashBits; + uint32_t RecordDataSize; + std::atomic<int64_t> RootTrieOffset; + std::atomic<int64_t> AllocatorOffset; + }; + + operator TableHandle() const { + if (!H) + return TableHandle(); + return TableHandle(*Region, H->GenericHeader); + } + + struct RecordData { + OnDiskTrieRawHashMap::ValueProxy Proxy; + SubtrieSlotValue Offset; + FileOffset getFileOffset() const { return Offset.asDataFileOffset(); } + }; + + enum Limits : size_t { + /// Seems like 65528 hash bits ought to be enough. + MaxNumHashBytes = UINT16_MAX >> 3, + MaxNumHashBits = MaxNumHashBytes << 3, + + /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit + /// index. This many slots is suspicously large anyway. + MaxNumRootBits = 16, + + /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously + /// large for subtries. + MaxNumSubtrieBits = 10, + }; + + static constexpr size_t getNumHashBytes(size_t NumHashBits) { + assert(NumHashBits % 8 == 0); + return NumHashBits / 8; + } + static constexpr size_t getRecordSize(size_t RecordDataSize, + size_t NumHashBits) { + return RecordDataSize + getNumHashBytes(NumHashBits); + } + + RecordData getRecord(SubtrieSlotValue Offset); + Expected<RecordData> createRecord(MappedFileRegionArena &Alloc, + ArrayRef<uint8_t> Hash); + + explicit operator bool() const { return H; } + const Header &getHeader() const { return *H; } + SubtrieHandle getRoot() const; + Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc); + MappedFileRegion &getRegion() const { return *Region; } + + size_t getFlags() const { return H->Flags; } + size_t getNumSubtrieBits() const { return H->NumSubtrieBits; } + size_t getNumHashBits() const { return H->NumHashBits; } + size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); } + size_t getRecordDataSize() const { return H->RecordDataSize; } + size_t getRecordSize() const { + return getRecordSize(H->RecordDataSize, H->NumHashBits); + } + + TrieHashIndexGenerator getIndexGen(SubtrieHandle Root, + ArrayRef<uint8_t> Hash) { + assert(Root.getStartBit() == 0); + assert(getNumHashBytes() == Hash.size()); + assert(getNumHashBits() == Hash.size() * 8); + return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash}; + } + + static Expected<TrieRawHashMapHandle> + create(MappedFileRegionArena &Alloc, StringRef Name, + std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits, + uint64_t NumHashBits, uint64_t RecordDataSize); + + void + print(raw_ostream &OS, + function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const; + + Error validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const; + TrieRawHashMapHandle() = default; + TrieRawHashMapHandle(MappedFileRegion &Region, Header &H) + : Region(&Region), H(&H) {} + TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset) + : TrieRawHashMapHandle( + Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) { + } + +private: + MappedFileRegion *Region = nullptr; + Header *H = nullptr; +}; + +} // end anonymous namespace + +struct OnDiskTrieRawHashMap::ImplType { + DatabaseFile File; + TrieRawHashMapHandle Trie; +}; + +Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc, + uint32_t StartBit, + uint32_t NumBits) { + assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits); + assert(NumBits <= UINT8_MAX); + assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits); + + auto Mem = Alloc.allocate(getSize(NumBits)); + if (LLVM_UNLIKELY(!Mem)) + return Mem.takeError(); + auto *H = + new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits, + /*ZeroPad1B=*/0, /*ZeroPad4B=*/0}; + SubtrieHandle S(Alloc.getRegion(), *H); + for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I) + new (I) SlotT(0); + return S; +} + +SubtrieHandle TrieRawHashMapHandle::getRoot() const { + if (int64_t Root = H->RootTrieOffset) + return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root)); + return SubtrieHandle(); +} + +Expected<SubtrieHandle> +TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) { + assert(&Alloc.getRegion() == &getRegion()); + if (SubtrieHandle Root = getRoot()) + return Root; + + int64_t Race = 0; + auto LazyRoot = SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits); + if (LLVM_UNLIKELY(!LazyRoot)) + return LazyRoot.takeError(); + if (H->RootTrieOffset.compare_exchange_strong( + Race, LazyRoot->getOffset().asSubtrie())) + return *LazyRoot; + + // There was a race. Return the other root. + // + // TODO: Avoid leaking the lazy root by storing it in an allocator. + return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race)); +} + +Expected<TrieRawHashMapHandle> +TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name, + std::optional<uint64_t> NumRootBits, + uint64_t NumSubtrieBits, uint64_t NumHashBits, + uint64_t RecordDataSize) { + // Allocate. + auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + // Construct the header and the name. + assert(Name.size() <= UINT16_MAX && "Expected smaller table name"); + assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits"); + assert(NumHashBits <= UINT16_MAX && "Expected valid hash size"); + assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name"); + auto *H = new (Alloc.getRegion().data() + *Offset) + Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(), + (uint32_t)sizeof(Header)}, + (uint8_t)NumSubtrieBits, + /*Flags=*/0, + (uint16_t)NumHashBits, + (uint32_t)RecordDataSize, + /*RootTrieOffset=*/{0}, + /*AllocatorOffset=*/{0}}; + char *NameStorage = reinterpret_cast<char *>(H + 1); + llvm::copy(Name, NameStorage); + NameStorage[Name.size()] = 0; + + // Construct a root trie, if requested. + TrieRawHashMapHandle Trie(Alloc.getRegion(), *H); + auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits); + if (LLVM_UNLIKELY(!Sub)) + return Sub.takeError(); + if (NumRootBits) + H->RootTrieOffset = Sub->getOffset().asSubtrie(); + return Trie; +} + +TrieRawHashMapHandle::RecordData +TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) { + char *Begin = Region->data() + Offset.asData(); + OnDiskTrieRawHashMap::ValueProxy Proxy; + Proxy.Data = MutableArrayRef(Begin, getRecordDataSize()); + Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()), + getNumHashBytes()); + return RecordData{Proxy, Offset}; +} + +Expected<TrieRawHashMapHandle::RecordData> +TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc, + ArrayRef<uint8_t> Hash) { + assert(&Alloc.getRegion() == Region); + assert(Hash.size() == getNumHashBytes()); + auto Offset = Alloc.allocateOffset(getRecordSize()); + if (LLVM_UNLIKELY(!Offset)) + return Offset.takeError(); + + RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset)); + llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin())); + return Record; +} + +Expected<OnDiskTrieRawHashMap::const_pointer> +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { + // Check alignment. + if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get())) + return createStringError(make_error_code(std::errc::protocol_error), + "unaligned file offset at 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); + + // Check bounds. + // + // Note: There's no potential overflow when using \c uint64_t because Offset + // is in valid offset range and the record size is in \c [0,UINT32_MAX]. + if (!validOffset(Offset) || + Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size()) + return createStringError(make_error_code(std::errc::protocol_error), + "file offset too large: 0x" + + utohexstr(Offset.get(), /*LowerCase=*/true)); + + // Looks okay... + TrieRawHashMapHandle::RecordData D = + Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); + return const_pointer(D.Proxy, D.getFileOffset()); +} + +OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { + TrieRawHashMapHandle Trie = Impl->Trie; + assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); + + SubtrieHandle S = Trie.getRoot(); + if (!S) + return const_pointer(); + + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash); + size_t Index = IndexGen.next(); + for (;;) { + // Try to set the content. + SubtrieSlotValue V = S.load(Index); + if (!V) + return const_pointer(); + + // Check for an exact match. + if (V.isData()) { + TrieRawHashMapHandle::RecordData D = Trie.getRecord(V); + return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset()) + : const_pointer(); + } + + Index = IndexGen.next(); + S = SubtrieHandle(Trie.getRegion(), V); + } +} + +/// Only safe if the subtrie is empty. +void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) { + assert(StartBit > H->StartBit); + assert(NumBits <= H->NumBits); + // Ideally would also assert that all slots are empty, but that's expensive. + + H->StartBit = StartBit; + H->NumBits = NumBits; +} + +Expected<OnDiskTrieRawHashMap::pointer> +OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct, + LazyInsertOnLeakCB OnLeak) { + TrieRawHashMapHandle Trie = Impl->Trie; + assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash"); + + MappedFileRegionArena &Alloc = Impl->File.getAlloc(); + std::optional<SubtrieHandle> S; + auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + + TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash); + size_t Index = IndexGen.next(); + + // Walk through the hash bytes and insert into correct trie position. + std::optional<TrieRawHashMapHandle::RecordData> NewRecord; + SubtrieHandle UnusedSubtrie; + for (;;) { + SubtrieSlotValue Existing = S->load(Index); + + // Try to set it, if it's empty. + if (!Existing) { + if (!NewRecord) { + auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + if (OnConstruct) + OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy); + } + + if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset)) + return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset()); + + // Race means that Existing is no longer empty; fall through... + } + + if (Existing.isSubtrie()) { + S = SubtrieHandle(Trie.getRegion(), Existing); + Index = IndexGen.next(); + continue; + } + + // Check for an exact match. + TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing); + if (ExistingRecord.Proxy.Hash == Hash) { + if (NewRecord && OnLeak) + OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy, + ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy); + return pointer(ExistingRecord.Proxy, + ExistingRecord.Offset.asDataFileOffset()); + } + + // Sink the existing content as long as the indexes match. + for (;;) { + size_t NextIndex = IndexGen.next(); + size_t NewIndexForExistingContent = + IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash); + + auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(), + UnusedSubtrie, NewIndexForExistingContent) + .moveInto(S); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + Index = NextIndex; + + // Found the difference. + if (NextIndex != NewIndexForExistingContent) + break; + } + } +} + +Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V, + MappedFileRegionArena &Alloc, + size_t NumSubtrieBits, + SubtrieHandle &UnusedSubtrie, + size_t NewI) { + std::optional<SubtrieHandle> NewS; + if (UnusedSubtrie) { + // Steal UnusedSubtrie and initialize it. + NewS.emplace(); + std::swap(*NewS, UnusedSubtrie); + NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits); + } else { + // Allocate a new, empty subtrie. + auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(), + NumSubtrieBits) + .moveInto(NewS); + if (LLVM_UNLIKELY(Err)) + return std::move(Err); + } + + NewS->store(NewI, V); + if (compare_exchange_strong(I, V, NewS->getOffset())) + return *NewS; // Success! + + // Raced. + assert(V.isSubtrie() && "Expected racing sink() to add a subtrie"); + + // Wipe out the new slot so NewS can be reused and set the out parameter. + NewS->store(NewI, SubtrieSlotValue()); + UnusedSubtrie = *NewS; + + // Return the subtrie added by the concurrent sink() call. + return SubtrieHandle(Alloc.getRegion(), V); +} + +void OnDiskTrieRawHashMap::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { + Impl->Trie.print(OS, PrintRecordData); +} + +Error OnDiskTrieRawHashMap::validate( + function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const { + return Impl->Trie.validate(RecordVerifier); +} + +// Helper function that prints hexdigit and have a sub-byte starting position. +static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, + size_t StartBit, size_t NumBits) { + assert(StartBit % 4 == 0); + assert(NumBits % 4 == 0); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) { + uint8_t HexPair = Bytes[I / 8]; + uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf; + OS << hexdigit(HexDigit, /*LowerCase=*/true); + } +} + +static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit + NumBits <= Bytes.size() * 8u); + for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) { + uint8_t Byte = Bytes[I / 8]; + size_t ByteOffset = I % 8; + if (size_t ByteShift = 8 - ByteOffset - 1) + Byte >>= ByteShift; + OS << (Byte & 0x1 ? '1' : '0'); + } +} + +void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const { + // afb[1c:00*01110*0]def + size_t EndBit = getStartBit() + getNumBits(); + size_t HashEndBit = Bytes.size() * 8u; + + size_t FirstBinaryBit = getStartBit() & ~0x3u; + printHexDigits(OS, Bytes, 0, FirstBinaryBit); + + size_t LastBinaryBit = (EndBit + 3u) & ~0x3u; + OS << "["; + printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit); + OS << "]"; + + printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit); +} + +static void appendIndexBits(std::string &Prefix, size_t Index, + size_t NumSlots) { + std::string Bits; + for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) { + Bits.push_back('0' + (Index & 0x1)); + Index >>= 1; + } + for (char Ch : llvm::reverse(Bits)) + Prefix += Ch; +} + +static void printPrefix(raw_ostream &OS, StringRef Prefix) { + while (Prefix.size() >= 4) { + uint8_t Digit; + bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit); + assert(!ErrorParsingBinary); + (void)ErrorParsingBinary; + OS << hexdigit(Digit, /*LowerCase=*/true); + Prefix = Prefix.drop_front(4); + } + if (!Prefix.empty()) + OS << "[" << Prefix << "]"; +} + +LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); } + +static Expected<size_t> checkParameter(StringRef Label, size_t Max, + std::optional<size_t> Value, + std::optional<size_t> Default, + StringRef Path, StringRef TableName) { + assert(Value || Default); + assert(!Default || *Default <= Max); + if (!Value) + return *Default; + + if (*Value <= Max) + return *Value; + return createTableConfigError( + std::errc::argument_out_of_domain, Path, TableName, + "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")"); +} + +size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); } +size_t OnDiskTrieRawHashMap::capacity() const { + return Impl->File.getRegion().size(); +} + +Expected<OnDiskTrieRawHashMap> +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, + size_t NumHashBits, uint64_t DataSize, + uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + std::optional<size_t> NewTableNumRootBits, + std::optional<size_t> NewTableNumSubtrieBits) { + SmallString<128> PathStorage; + StringRef Path = PathTwine.toStringRef(PathStorage); + SmallString<128> TrieNameStorage; + StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage); + + constexpr size_t DefaultNumRootBits = 10; + constexpr size_t DefaultNumSubtrieBits = 6; + + size_t NumRootBits; + if (Error E = checkParameter( + "root bits", TrieRawHashMapHandle::MaxNumRootBits, + NewTableNumRootBits, DefaultNumRootBits, Path, TrieName) + .moveInto(NumRootBits)) + return std::move(E); + + size_t NumSubtrieBits; + if (Error E = checkParameter("subtrie bits", + TrieRawHashMapHandle::MaxNumSubtrieBits, + NewTableNumSubtrieBits, DefaultNumSubtrieBits, + Path, TrieName) + .moveInto(NumSubtrieBits)) + return std::move(E); + + size_t NumHashBytes = NumHashBits >> 3; + if (Error E = + checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits, + NumHashBits, std::nullopt, Path, TrieName) + .takeError()) + return std::move(E); + assert(NumHashBits == NumHashBytes << 3 && + "Expected hash size to be byte-aligned"); + if (NumHashBits != NumHashBytes << 3) + return createTableConfigError( + std::errc::argument_out_of_domain, Path, TrieName, + "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)"); + + // Constructor for if the file doesn't exist. + auto NewDBConstructor = [&](DatabaseFile &DB) -> Error { + auto Trie = + TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits, + NumSubtrieBits, NumHashBits, DataSize); + if (LLVM_UNLIKELY(!Trie)) + return Trie.takeError(); + + return DB.addTable(*Trie); + }; + + // Get or create the file. + Expected<DatabaseFile> File = + DatabaseFile::create(Path, MaxFileSize, NewDBConstructor); + if (!File) + return File.takeError(); + + // Find the trie and validate it. + std::optional<TableHandle> Table = File->findTable(TrieName); + if (!Table) + return createTableConfigError(std::errc::argument_out_of_domain, Path, + TrieName, "table not found"); + if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind, + (size_t)Table->getHeader().Kind, Path, TrieName)) + return std::move(E); + auto Trie = Table->cast<TrieRawHashMapHandle>(); + assert(Trie && "Already checked the kind"); + + // Check the hash and data size. + if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(), + Path, TrieName)) + return std::move(E); + if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(), + Path, TrieName)) + return std::move(E); + + // No flags supported right now. Either corrupt, or coming from a future + // writer. + if (size_t Flags = Trie.getFlags()) + return createTableConfigError(std::errc::invalid_argument, Path, TrieName, + "unsupported flags: " + Twine(Flags)); + + // Success. + OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie}; + return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl))); +} + +static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) { + return createStringError(make_error_code(std::errc::protocol_error), + "invalid trie at 0x" + + utohexstr(Offset, /*LowerCase=*/true) + ": " + + Msg); +} + +//===----------------------------------------------------------------------===// +// TrieVisitor data structures. +//===----------------------------------------------------------------------===// + +namespace { +/// A multi-threaded vistior to traverse the Trie. +/// +/// TODO: add more sanity checks that isn't just plain data corruption. For +/// example, some ill-formed data can be constructed to form a cycle using +/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting +/// data). +class TrieVisitor { +public: + TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0, + unsigned ErrorLimit = 50) + : Trie(Trie), ErrorLimit(ErrorLimit), + Threads(hardware_concurrency(ThreadCount)) {} + virtual ~TrieVisitor() = default; + Error visit(); + +private: + // Virtual method to implement the action when visiting a sub-trie. + virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) { + return Error::success(); + } + + // Virtual method to implement the action when visiting a slot in a trie node. + virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) { + return Error::success(); + } + +protected: + TrieRawHashMapHandle Trie; + +private: + Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix); + + Error validateSubTrie(SubtrieHandle Node, bool IsRoot); + + // Helper function to capture errors when visiting the trie nodes. + void addError(Error NewError) { + assert(NewError && "not an error"); + std::lock_guard<std::mutex> ErrorLock(Lock); + if (NumError >= ErrorLimit) { + // Too many errors. + consumeError(std::move(NewError)); + return; + } + + if (Err) + Err = joinErrors(std::move(*Err), std::move(NewError)); + else + Err = std::move(NewError); + NumError++; + } + + bool tooManyErrors() { + std::lock_guard<std::mutex> ErrorLock(Lock); + return (bool)Err && NumError >= ErrorLimit; + } + + const unsigned ErrorLimit; + std::optional<Error> Err; + unsigned NumError = 0; + std::mutex Lock; + DefaultThreadPool Threads; +}; + +/// A visitor that traverse and print the Trie. +class TriePrinter : public TrieVisitor { +public: + TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS, + function_ref<void(ArrayRef<char>)> PrintRecordData) + : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS), + PrintRecordData(PrintRecordData) {} + + Error printRecords() { + if (Records.empty()) + return Error::success(); + + OS << "records\n"; + llvm::sort(Records); + for (int64_t Offset : Records) { + TrieRawHashMapHandle::RecordData Record = + Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset)); + if (auto Err = printRecord(Record)) + return Err; + } + return Error::success(); + } + + Error printRecord(TrieRawHashMapHandle::RecordData &Record) { + OS << "- addr=" << (void *)Record.getFileOffset().get() << " "; + if (PrintRecordData) { + PrintRecordData(Record.Proxy.Data); + } else { + OS << "bytes="; + ArrayRef<uint8_t> Data( + reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()), + Record.Proxy.Data.size()); + printHexDigits(OS, Data, 0, Data.size() * 8); + } + OS << "\n"; + return Error::success(); + } + + Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override { + if (Prefix.empty()) { + OS << "root"; + } else { + OS << "subtrie="; + printPrefix(OS, Prefix); + } + + OS << " addr=" + << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) - + Trie.getRegion().data()); + OS << " num-slots=" << SubTrie.getNumSlots() << "\n"; + return Error::success(); + } + + Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) override { + OS << "- index="; + for (size_t Pad : {10, 100, 1000}) + if (I < Pad && Subtrie.getNumSlots() >= Pad) + OS << "0"; + OS << I << " "; + if (Slot.isSubtrie()) { + OS << "addr=" << (void *)Slot.asSubtrie(); + OS << " subtrie="; + printPrefix(OS, Prefix); + OS << "\n"; + return Error::success(); + } + TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot); + OS << "addr=" << (void *)Record.getFileOffset().get(); + OS << " content="; + Subtrie.printHash(OS, Record.Proxy.Hash); + OS << "\n"; + Records.push_back(Slot.asData()); + return Error::success(); + } + +private: + raw_ostream &OS; + function_ref<void(ArrayRef<char>)> PrintRecordData; + SmallVector<int64_t> Records; +}; + +/// TrieVerifier that adds additional verification on top of the basic visitor. +class TrieVerifier : public TrieVisitor { +public: + TrieVerifier( + TrieRawHashMapHandle Trie, + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) + : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {} + +private: + Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final { + return Error::success(); + } + + Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix, + SubtrieSlotValue Slot) final { + if (RecordVerifier && Slot.isData()) { + if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData())) + return createInvalidTrieError(Slot.asData(), "mis-aligned data entry"); + + TrieRawHashMapHandle::RecordData Record = + Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData())); + return RecordVerifier(Slot.asDataFileOffset(), + OnDiskTrieRawHashMap::ConstValueProxy{ + Record.Proxy.Hash, Record.Proxy.Data}); + } + return Error::success(); + } + + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier; +}; +} // namespace + +Error TrieVisitor::visit() { + auto Root = Trie.getRoot(); + if (!Root) + return Error::success(); + + if (auto Err = validateSubTrie(Root, /*IsRoot=*/true)) + return Err; + + if (auto Err = visitSubTrie("", Root)) + return Err; + + SmallVector<SubtrieHandle> Subs; + SmallVector<std::string> Prefixes; + const size_t NumSlots = Root.getNumSlots(); + for (size_t I = 0, E = NumSlots; I != E; ++I) { + SubtrieSlotValue Slot = Root.load(I); + if (!Slot) + continue; + uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData(); + if (Offset >= (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(Offset, "slot points out of bound"); + std::string SubtriePrefix; + appendIndexBits(SubtriePrefix, I, NumSlots); + if (Slot.isSubtrie()) { + SubtrieHandle S(Trie.getRegion(), Slot); + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + } + if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot)) + return Err; + } + + for (size_t I = 0, E = Subs.size(); I != E; ++I) { + Threads.async( + [&](unsigned Idx) { + // Don't run if there is an error already. + if (tooManyErrors()) + return; + if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx])) + addError(std::move(Err)); + }, + I); + } + + Threads.wait(); + if (Err) + return std::move(*Err); + return Error::success(); +} + +Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) { + char *Addr = reinterpret_cast<char *>(&Node.getHeader()); + const int64_t Offset = Node.getFileOffset().get(); + if (Addr + Node.getSize() >= + Trie.getRegion().data() + Trie.getRegion().size()) + return createInvalidTrieError(Offset, "subtrie node spans out of bound"); + + if (!IsRoot && + Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) { + return createInvalidTrieError(Offset, + "subtrie represents too many hash bits"); + } + + if (IsRoot) { + if (Node.getStartBit() != 0) + return createInvalidTrieError(Offset, + "root node doesn't start at 0 index"); + + return Error::success(); + } + + if (Node.getNumBits() > Trie.getNumSubtrieBits()) + return createInvalidTrieError(Offset, "subtrie has wrong number of slots"); + + return Error::success(); +} + +Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) { + if (auto Err = validateSubTrie(Node, /*IsRoot=*/false)) + return Err; + + if (auto Err = visitSubTrie(Prefix, Node)) + return Err; + + SmallVector<SubtrieHandle> Subs; + SmallVector<std::string> Prefixes; + const size_t NumSlots = Node.getNumSlots(); + for (size_t I = 0, E = NumSlots; I != E; ++I) { + SubtrieSlotValue Slot = Node.load(I); + if (!Slot) + continue; + uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData(); + if (Offset >= (uint64_t)Trie.getRegion().size()) + return createInvalidTrieError(Offset, "slot points out of bound"); + std::string SubtriePrefix = Prefix.str(); + appendIndexBits(SubtriePrefix, I, NumSlots); + if (Slot.isSubtrie()) { + SubtrieHandle S(Trie.getRegion(), Slot); + Subs.push_back(S); + Prefixes.push_back(SubtriePrefix); + } + if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot)) + return Err; + } + for (size_t I = 0, E = Subs.size(); I != E; ++I) + if (auto Err = traverseTrieNode(Subs[I], Prefixes[I])) + return Err; + + return Error::success(); +} + +void TrieRawHashMapHandle::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { + OS << "hash-num-bits=" << getNumHashBits() + << " hash-size=" << getNumHashBytes() + << " record-data-size=" << getRecordDataSize() << "\n"; + + TriePrinter Printer(*this, OS, PrintRecordData); + if (auto Err = Printer.visit()) + OS << "error: " << toString(std::move(Err)) << "\n"; + + if (auto Err = Printer.printRecords()) + OS << "error: " << toString(std::move(Err)) << "\n"; + + return; +} + +Error TrieRawHashMapHandle::validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const { + // Use the base TrieVisitor to identify the errors inside trie first. + TrieVisitor BasicVerifier(*this); + if (auto Err = BasicVerifier.visit()) + return Err; + + // If the trie data structure is sound, do a second pass to verify data and + // verifier function can assume the index is correct. However, there can be + // newly added bad entries that can still produce error. + TrieVerifier Verifier(*this, RecordVerifier); + return Verifier.visit(); +} + +#else // !LLVM_ENABLE_ONDISK_CAS + +struct OnDiskTrieRawHashMap::ImplType {}; + +Expected<OnDiskTrieRawHashMap> +OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine, + size_t NumHashBits, uint64_t DataSize, + uint64_t MaxFileSize, + std::optional<uint64_t> NewFileInitialSize, + std::optional<size_t> NewTableNumRootBits, + std::optional<size_t> NewTableNumSubtrieBits) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +Expected<OnDiskTrieRawHashMap::pointer> +OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash, + LazyInsertOnConstructCB OnConstruct, + LazyInsertOnLeakCB OnLeak) { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +Expected<OnDiskTrieRawHashMap::const_pointer> +OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +OnDiskTrieRawHashMap::const_pointer +OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const { + return const_pointer(); +} + +void OnDiskTrieRawHashMap::print( + raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const { +} + +Error OnDiskTrieRawHashMap::validate( + function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)> + RecordVerifier) const { + return createStringError(make_error_code(std::errc::not_supported), + "OnDiskTrieRawHashMap is not supported"); +} + +size_t OnDiskTrieRawHashMap::size() const { return 0; } +size_t OnDiskTrieRawHashMap::capacity() const { return 0; } + +#endif // LLVM_ENABLE_ONDISK_CAS + +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl) + : Impl(std::move(Impl)) {} +OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) = + default; +OnDiskTrieRawHashMap & +OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default; +OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default; diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 5827292..99029c1 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -252,6 +252,13 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) { MDB.createString(PassName)})); } +void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F, + StringRef PassName) { + if (std::optional<Function::ProfileCount> EC = F.getEntryCount(); + EC && EC->getCount() > 0) + setExplicitlyUnknownBranchWeights(I, PassName); +} + void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) { MDBuilder MDB(F.getContext()); F.setMetadata( diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp index 1e1042c..0dd378e 100644 --- a/llvm/lib/Object/OffloadBundle.cpp +++ b/llvm/lib/Object/OffloadBundle.cpp @@ -89,17 +89,17 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer, uint64_t EntryIDSize; StringRef EntryID; - if (auto EC = Reader.readInteger(EntryOffset)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntryOffset)) + return Err; - if (auto EC = Reader.readInteger(EntrySize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntrySize)) + return Err; - if (auto EC = Reader.readInteger(EntryIDSize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readInteger(EntryIDSize)) + return Err; - if (auto EC = Reader.readFixedString(EntryID, EntryIDSize)) - return errorCodeToError(object_error::parse_failed); + if (Error Err = Reader.readFixedString(EntryID, EntryIDSize)) + return Err; auto Entry = std::make_unique<OffloadBundleEntry>( EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID); @@ -125,7 +125,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset, // Read the Bundle Entries Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset); if (Err) - return errorCodeToError(object_error::parse_failed); + return Err; return std::unique_ptr<OffloadBundleFatBin>(TheBundle); } diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp index 686688a..8da6fdb 100644 --- a/llvm/lib/Support/Mustache.cpp +++ b/llvm/lib/Support/Mustache.cpp @@ -282,18 +282,15 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) { // For example: // The template string // " \t{{#section}}A{{/section}}" -// would be considered as having no text ahead and would be render as +// would be considered as having no text ahead and would be render as: // "A" -// The exception for this is partial tag which requires us to -// keep track of the indentation once it's rendered. void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx, Token &CurrentToken, Token::Type CurrentType) { Token &PrevToken = Tokens[Idx - 1]; StringRef PrevTokenBody = PrevToken.TokenBody; StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v"); size_t Indentation = PrevTokenBody.size() - Unindented.size(); - if (CurrentType != Token::Type::Partial) - PrevToken.TokenBody = Unindented.str(); + PrevToken.TokenBody = Unindented.str(); CurrentToken.setIndentation(Indentation); } @@ -439,7 +436,8 @@ class AddIndentationStringStream : public raw_ostream { public: explicit AddIndentationStringStream(llvm::raw_ostream &WrappedStream, size_t Indentation) - : Indentation(Indentation), WrappedStream(WrappedStream) { + : Indentation(Indentation), WrappedStream(WrappedStream), + NeedsIndent(true) { SetUnbuffered(); } @@ -448,10 +446,15 @@ protected: llvm::StringRef Data(Ptr, Size); SmallString<0> Indent; Indent.resize(Indentation, ' '); + for (char C : Data) { + if (NeedsIndent && C != '\n') { + WrappedStream << Indent; + NeedsIndent = false; + } WrappedStream << C; if (C == '\n') - WrappedStream << Indent; + NeedsIndent = true; } } @@ -460,6 +463,7 @@ protected: private: size_t Indentation; llvm::raw_ostream &WrappedStream; + bool NeedsIndent; }; class Parser { diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index cb57c43..d4d9e54 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB, // we need to invert the branch condition to jump over TrueBB when the // condition is false. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); // Insert branch instruction. BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC))) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 56db09a..6d418fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1134,7 +1134,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) { } } -RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { +RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) { switch (CC) { default: llvm_unreachable("Unrecognized conditional branch"); @@ -1554,7 +1554,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { return Register(); }; - unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC)); + unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC)); // Might be case 1. // Don't change 0 to 1 since we can use x0. @@ -1801,7 +1801,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI, // Add condition code, inverting if necessary. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); if (Invert) - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); NewMI.addImm(CC); // Copy the false register. @@ -3978,7 +3978,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, case RISCV::PseudoCCMOVGPR: { // CCMOV can be commuted by inverting the condition. auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); - CC = RISCVCC::getOppositeBranchCondition(CC); + CC = RISCVCC::getInverseBranchCondition(CC); auto &WorkingMI = cloneIfNew(MI); WorkingMI.getOperand(3).setImm(CC); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 2bc499b..42a0c4c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -44,7 +44,7 @@ enum CondCode { COND_INVALID }; -CondCode getOppositeBranchCondition(CondCode); +CondCode getInverseBranchCondition(CondCode); unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0); } // end of namespace RISCVCC diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 292eab7..cd04ff5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45169,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( case X86ISD::Wrapper: case X86ISD::WrapperRIP: return true; + case X86ISD::INSERTPS: case X86ISD::BLENDI: case X86ISD::PSHUFB: case X86ISD::PSHUFD: @@ -45239,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( case X86ISD::BLENDV: return false; // SSE target shuffles. + case X86ISD::INSERTPS: case X86ISD::PSHUFB: case X86ISD::PSHUFD: case X86ISD::UNPCKL: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d1ca0a6..59e103cd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -880,11 +880,11 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) { // zext(bool) + C -> bool ? C + 1 : C if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1); + return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1); // sext(bool) + C -> bool ? C - 1 : C if (match(Op0, m_SExt(m_Value(X))) && X->getType()->getScalarSizeInBits() == 1) - return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1); + return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1); // ~X + C --> (C-1) - X if (match(Op0, m_Not(m_Value(X)))) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 7a979c1..4f94aa2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -23,6 +23,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -62,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final public InstVisitor<InstCombinerImpl, Instruction *> { public: InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder, - bool MinimizeSize, AAResults *AA, AssumptionCache &AC, + Function &F, AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const DataLayout &DL, ReversePostOrderTraversal<BasicBlock *> &RPOT) - : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE, - BFI, BPI, PSI, DL, RPOT) {} + : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI, + PSI, DL, RPOT) {} virtual ~InstCombinerImpl() = default; @@ -469,6 +470,17 @@ private: Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable, unsigned Depth = 0); + SelectInst *createSelectInst(Value *C, Value *S1, Value *S2, + const Twine &NameStr = "", + InsertPosition InsertBefore = nullptr, + Instruction *MDFrom = nullptr) { + SelectInst *SI = + SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom); + if (!MDFrom) + setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE); + return SI; + } + public: /// Create and insert the idiom we use to indicate a block is unreachable /// without having to rewrite the CFG from within InstCombine. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 550f095..d457e0c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1253,7 +1253,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { // shl (zext i1 X), C1 --> select (X, 1 << C1, 0) if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1); - return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty)); + return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty)); } } diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index f0ddd5c..8fbaf68 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1735,7 +1735,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) { Constant *Zero = ConstantInt::getNullValue(BO.getType()); Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C); Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C); - return SelectInst::Create(X, TVal, FVal); + return createSelectInst(X, TVal, FVal); } static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI, @@ -5934,8 +5934,8 @@ static bool combineInstructionsOverFunction( LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, - ORE, BFI, BPI, PSI, DL, RPOT); + InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, + BPI, PSI, DL, RPOT); IC.MaxArraySizeForCombine = MaxArraySize; bool MadeChangeInThisIteration = IC.prepareWorklist(F); MadeChangeInThisIteration |= IC.run(); diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index e5bf2d1..d842275 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -35,6 +35,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation/CFGMST.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" @@ -92,8 +93,10 @@ class GCOVFunction; class GCOVProfiler { public: - GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {} - GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {} + GCOVProfiler() + : GCOVProfiler(GCOVOptions::getDefault(), *vfs::getRealFileSystem()) {} + GCOVProfiler(const GCOVOptions &Opts, vfs::FileSystem &VFS) + : Options(Opts), VFS(VFS) {} bool runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI, function_ref<BranchProbabilityInfo *(Function &F)> GetBPI, @@ -110,6 +113,7 @@ public: os->write_zeros(4 - s.size() % 4); } void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); } + vfs::FileSystem &getVirtualFileSystem() const { return VFS; } private: // Create the .gcno files for the Module based on DebugInfo. @@ -166,6 +170,7 @@ private: std::vector<Regex> ExcludeRe; DenseSet<const BasicBlock *> ExecBlocks; StringMap<bool> InstrumentedFiles; + vfs::FileSystem &VFS; }; struct BBInfo { @@ -214,10 +219,10 @@ static StringRef getFunctionName(const DISubprogram *SP) { /// Prefer relative paths in the coverage notes. Clang also may split /// up absolute paths into a directory and filename component. When /// the relative path doesn't exist, reconstruct the absolute path. -static SmallString<128> getFilename(const DIScope *SP) { +static SmallString<128> getFilename(const DIScope *SP, vfs::FileSystem &VFS) { SmallString<128> Path; StringRef RelPath = SP->getFilename(); - if (sys::fs::exists(RelPath)) + if (VFS.exists(RelPath)) Path = RelPath; else sys::path::append(Path, SP->getDirectory(), SP->getFilename()); @@ -357,7 +362,7 @@ namespace { void writeOut(uint32_t CfgChecksum) { write(GCOV_TAG_FUNCTION); - SmallString<128> Filename = getFilename(SP); + SmallString<128> Filename = getFilename(SP, P->getVirtualFileSystem()); uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP)); BlockLen += 1 + wordsOfString(Filename) + 4; @@ -455,7 +460,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) { if (FilterRe.empty() && ExcludeRe.empty()) { return true; } - SmallString<128> Filename = getFilename(F.getSubprogram()); + SmallString<128> Filename = getFilename(F.getSubprogram(), VFS); auto It = InstrumentedFiles.find(Filename); if (It != InstrumentedFiles.end()) { return It->second; @@ -467,7 +472,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) { // Path can be // /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for // such a case we must get the real_path. - if (sys::fs::real_path(Filename, RealPath)) { + if (VFS.getRealPath(Filename, RealPath)) { // real_path can fail with path like "foo.c". RealFilename = Filename; } else { @@ -524,9 +529,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU, SmallString<128> Filename = CU->getFilename(); sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda"); StringRef FName = sys::path::filename(Filename); - SmallString<128> CurPath; - if (sys::fs::current_path(CurPath)) + ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory(); + if (!CWD) return std::string(FName); + SmallString<128> CurPath{*CWD}; sys::path::append(CurPath, FName); return std::string(CurPath); } @@ -554,7 +560,7 @@ bool GCOVProfiler::runOnModule( PreservedAnalyses GCOVProfilerPass::run(Module &M, ModuleAnalysisManager &AM) { - GCOVProfiler Profiler(GCOVOpts); + GCOVProfiler Profiler(GCOVOpts, *VFS); FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); @@ -789,7 +795,7 @@ bool GCOVProfiler::emitProfileNotes( // Add the function line number to the lines of the entry block // to have a counter for the function definition. uint32_t Line = SP->getLine(); - auto Filename = getFilename(SP); + auto Filename = getFilename(SP, VFS); BranchProbabilityInfo *BPI = GetBPI(F); BlockFrequencyInfo *BFI = GetBFI(F); @@ -881,7 +887,7 @@ bool GCOVProfiler::emitProfileNotes( if (SP != getDISubprogram(Scope)) continue; - GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope())); + GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope(), VFS)); Lines.addLine(Loc.getLine()); } Line = 0; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 065622e..c547662 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper { // constant + x cannot be -constant - x // instead, it should be x - -constant if (Pos == 1 || - (FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub)) + ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or || + FromOpcode == Instruction::Xor) && + ToOpcode == Instruction::Sub)) return SmallVector<Value *>({LHS, RHS}); return SmallVector<Value *>({RHS, LHS}); } @@ -1188,6 +1190,10 @@ public: if (CIValue.isAllOnes()) InterchangeableMask = CanBeAll; break; + case Instruction::Xor: + if (CIValue.isZero()) + InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT; + break; default: if (CIValue.isZero()) InterchangeableMask = CanBeAll; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll index bec3349..3590c4d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll @@ -62,15 +62,12 @@ define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1) define <4 x float> @freeze_insertps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: freeze_insertps: ; SSE: # %bb.0: -; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE-NEXT: insertps {{.*#+}} xmm1 = xmm0[1],xmm1[1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: freeze_insertps: ; AVX: # %bb.0: -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],xmm1[1,2,3] +; AVX-NEXT: vmovaps %xmm1, %xmm0 ; AVX-NEXT: retq %s0 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 16) %f0 = freeze <4 x float> %s0 diff --git a/llvm/test/Transforms/InstCombine/preserve-profile.ll b/llvm/test/Transforms/InstCombine/preserve-profile.ll index dd83805..8cb3e68 100644 --- a/llvm/test/Transforms/InstCombine/preserve-profile.ll +++ b/llvm/test/Transforms/InstCombine/preserve-profile.ll @@ -46,9 +46,59 @@ define i32 @NegBin(i1 %C) !prof !0 { ret i32 %V } +define i32 @select_C_minus_1_or_C_from_bool(i1 %x) !prof !0 { +; CHECK-LABEL: define i32 @select_C_minus_1_or_C_from_bool( +; CHECK-SAME: i1 [[X:%.*]]) !prof [[PROF0]] { +; CHECK-NEXT: [[ADD:%.*]] = select i1 [[X]], i32 41, i32 42, !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %ext = sext i1 %x to i32 + %add = add i32 %ext, 42 + ret i32 %add +} + +define i5 @and_add(i1 %x, i1 %y) !prof !0 { +; CHECK-LABEL: define i5 @and_add( +; CHECK-SAME: i1 [[X:%.*]], i1 [[Y:%.*]]) !prof [[PROF0]] { +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X]], true +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[Y]], [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i5 -2, i5 0, !prof [[PROF2]] +; CHECK-NEXT: ret i5 [[R]] +; + %xz = zext i1 %x to i5 + %ys = sext i1 %y to i5 + %sub = add i5 %xz, %ys + %r = and i5 %sub, 30 + ret i5 %r +} + +define i32 @add_zext_zext_i1(i1 %a) !prof !0 { +; CHECK-LABEL: define i32 @add_zext_zext_i1( +; CHECK-SAME: i1 [[A:%.*]]) !prof [[PROF0]] { +; CHECK-NEXT: [[ADD:%.*]] = select i1 [[A]], i32 2, i32 0, !prof [[PROF2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %zext = zext i1 %a to i32 + %add = add i32 %zext, %zext + ret i32 %add +} + +define i32 @no_count_no_branch_weights(i1 %a) { +; CHECK-LABEL: define i32 @no_count_no_branch_weights( +; CHECK-SAME: i1 [[A:%.*]]) { +; CHECK-NEXT: [[ADD:%.*]] = select i1 [[A]], i32 2, i32 0 +; CHECK-NEXT: ret i32 [[ADD]] +; + %zext = zext i1 %a to i32 + %add = add i32 %zext, %zext + ret i32 %add +} + + !0 = !{!"function_entry_count", i64 1000} !1 = !{!"branch_weights", i32 2, i32 3} ;. ; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF2]] = !{!"unknown", !"instcombine"} ;. diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 8e9cb23..75420d4 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 ; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s declare void @llvm.assume(i1) @@ -47,29 +47,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -123,27 +102,8 @@ define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -216,29 +176,8 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 2) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -312,29 +251,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -408,29 +326,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -504,29 +401,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -596,29 +472,8 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -692,29 +547,8 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: br label %loop.header @@ -747,7 +581,7 @@ exit: define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %N) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_variable_trip_count( ; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -792,30 +626,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void ; entry: br label %loop.header @@ -867,28 +679,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -958,28 +750,8 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -1031,28 +803,8 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1105,28 +857,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -1196,28 +928,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ] @@ -1287,28 +999,8 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ] @@ -1376,27 +1068,8 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ] @@ -1465,27 +1138,8 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: %a = call ptr @get_ptr() @@ -1530,10 +1184,10 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] @@ -1542,7 +1196,7 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 @@ -1551,36 +1205,16 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1621,19 +1255,14 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4), "dereferenceable"(ptr [[TMP1]], i64 4) ] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP2]], i64 4), "dereferenceable"(ptr [[TMP2]], i64 4) ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -1642,7 +1271,8 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP18]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] @@ -1652,33 +1282,11 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH:.*]]: -; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] -; CHECK: [[LOOP_HEADER]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] -; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ] -; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] -; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4 -; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0 -; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]] -; CHECK: [[LOOP_THEN]]: -; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 -; CHECK-NEXT: br label %[[LOOP_LATCH]] -; CHECK: [[LOOP_LATCH]]: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ] -; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] -; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] @@ -1688,7 +1296,6 @@ entry: loop.header: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] %gep.a = getelementptr i32, ptr %a, i64 %iv - call void @llvm.assume(i1 true) [ "align"(ptr %gep.a, i64 4), "dereferenceable"(ptr %gep.a, i64 4) ] %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv %l.b = load i32, ptr %gep.b, align 4 %c.1 = icmp sge i32 %l.b, 0 @@ -1709,27 +1316,163 @@ loop.latch: exit: ret void } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} -; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]} -; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]} -; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]} -; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]} -; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} -; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} -; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} -; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} -; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} -;. + +define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i1 %pre) nosync { +; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors( +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i1 [[PRE:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] +; CHECK-NEXT: br i1 [[PRE]], label %[[THEN:.*]], label %[[ELSE:.*]] +; CHECK: [[THEN]]: +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH:.*:]] +; +entry: + call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ] + br i1 %pre, label %then, label %else + +then: + store i32 0, ptr %a + br label %loop.header + +else: + store i32 0, ptr %b + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %loop.latch ] + %gep.a = getelementptr i32, ptr %a, i64 %iv + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %l.b = load i32, ptr %gep.b, align 4 + %c.1 = icmp sge i32 %l.b, 0 + br i1 %c.1, label %loop.latch, label %loop.then + +loop.then: + %l.a = load i32, ptr %gep.a, align 4 + br label %loop.latch + +loop.latch: + %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ] + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %merge, ptr %gep.c, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 1000 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll b/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll index 7664fda..9cdcdf1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll @@ -6,7 +6,7 @@ define i1 @foo(i1 %v) { ; assume %v is 1 ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> poison, i1 [[V]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i1> <i1 false, i1 true>, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: [[SUB:%.*]] = sub i1 [[TMP3]], [[TMP4]] diff --git a/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml b/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml index 5312c25..17e91f1 100644 --- a/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml +++ b/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml @@ -2,9 +2,9 @@ # Then manually tempered with some of the value of the attribute # I hope there are easier ways to construct tests like this. -# RUN: yaml2obj %s -o verify_stmt_seq.o -# RUN: not llvm-dwarfdump -verify -debug-info verify_stmt_seq.o | FileCheck %s --check-prefix=CHECK_INVALID --implicit-check-not=error: -# RUN: llvm-dwarfdump -debug-line -verbose -debug-info verify_stmt_seq.o | FileCheck %s --check-prefix=CHECK_DEBUG_LINE +# RUN: yaml2obj %s -o %t.o +# RUN: not llvm-dwarfdump -verify -debug-info %t.o | FileCheck %s --check-prefix=CHECK_INVALID --implicit-check-not=error: +# RUN: llvm-dwarfdump -debug-line -verbose -debug-info %t.o | FileCheck %s --check-prefix=CHECK_DEBUG_LINE # CHECK_INVALID: error: DW_AT_LLVM_stmt_sequence offset 0x00000000 is not within the line table bounds [0x00000034, 0x000000fd) # CHECK_INVALID: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000000) diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test new file mode 100644 index 0000000..391b7ee --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test @@ -0,0 +1,26 @@ +## Test that --offloading with a fatbin works correctly. +# REQUIRES: amdgpu-registered-target + +# RUN: yaml2obj %s -o %t.elf +# RUN: llvm-readobj --offloading %t.elf 2>&1 | \ +# RUN: FileCheck %s --check-prefix=WARN -DFILE_NAME=%t.elf + +# WARN: warning: '{{.*}}': Stream Error: The stream is too short to perform the requested operation. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .hip_fatbin + Type: SHT_PROGBITS + AddressAlign: 0x1000 + Content: 5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B0000000000000075782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000 + - Name: .hipFatBinSegment + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x202FD0 + AddressAlign: 0x8 + Content: '465049480100000000102000000000000000000000000000' +... diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test new file mode 100644 index 0000000..21ee60d --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test @@ -0,0 +1,27 @@ +## Test that --offloading with a fatbin works correctly. +# REQUIRES: amdgpu-registered-target + +# RUN: yaml2obj %s -o %t.elf +# RUN: llvm-readobj --offloading %t.elf | \ +# RUN: FileCheck %s -DFILE_NAME=%t.elf + +# CHECK: host-x86_64-unknown-linux-- file://[[FILE_NAME]]#offset=8192&size=0 +# CHECK-NEXT: hipv4-amdgcn-amd-amdhsa--gfx908 file://[[FILE_NAME]]#offset=8192&size=4048 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .hip_fatbin + Type: SHT_PROGBITS + AddressAlign: 0x1000 + Content: 5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B00000000000000686F73742D7838365F36342D756E6B6E6F776E2D6C696E75782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000 + - Name: .hipFatBinSegment + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x202FD0 + AddressAlign: 0x8 + Content: '465049480100000000102000000000000000000000000000' +... diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp index bd670ae..0b59dd4 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.cpp +++ b/llvm/tools/llvm-readobj/ObjDumper.cpp @@ -16,6 +16,8 @@ #include "llvm/Object/Archive.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" +#include "llvm/Object/OffloadBundle.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" @@ -230,4 +232,14 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj, } } +void ObjDumper::printOffloading(const object::ObjectFile &Obj) { + SmallVector<llvm::object::OffloadBundleFatBin> Bundles; + if (Error Err = object::extractOffloadBundleFatBinary(Obj, Bundles)) + reportWarning(std::move(Err), Obj.getFileName()); + + // Print out all the FatBin Bundles that are contained in this buffer. + for (const auto &[Index, Bundle] : llvm::enumerate(Bundles)) + Bundle.printEntriesAsURI(); +} + } // namespace llvm diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h index a654078..d264394 100644 --- a/llvm/tools/llvm-readobj/ObjDumper.h +++ b/llvm/tools/llvm-readobj/ObjDumper.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/OffloadBinary.h" #include "llvm/Support/CommandLine.h" #include <unordered_set> @@ -188,6 +189,7 @@ public: std::function<Error(const Twine &Msg)> WarningHandler; void reportUniqueWarning(Error Err) const; void reportUniqueWarning(const Twine &Msg) const; + void printOffloading(const object::ObjectFile &Obj); protected: ScopedPrinter &W; diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td index 711522c..97d5d7f 100644 --- a/llvm/tools/llvm-readobj/Opts.td +++ b/llvm/tools/llvm-readobj/Opts.td @@ -32,6 +32,7 @@ def file_header : FF<"file-header", "Display file header">; def headers : FF<"headers", "Equivalent to setting: --file-header, --program-headers, --section-headers">; defm hex_dump : Eq<"hex-dump", "Display the specified section(s) as hexadecimal bytes">, MetaVarName<"<name or index>">; def pretty_print : FF<"pretty-print", "Pretty print JSON output">; +def offloading : FF<"offloading", "Display the content of the offloading section">; def relocs : FF<"relocs", "Display the relocation entries in the file">; def section_data : FF<"section-data", "Display section data for each section shown. This option has no effect for GNU style output">; def section_details : FF<"section-details", "Display the section details">; diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index 2b34761..5327731 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -135,6 +135,7 @@ static bool HashHistogram; static bool Memtag; static bool NeededLibraries; static bool Notes; +static bool Offloading; static bool ProgramHeaders; static bool SectionGroups; static std::vector<std::string> SFrame; @@ -274,6 +275,7 @@ static void parseOptions(const opt::InputArgList &Args) { opts::Memtag = Args.hasArg(OPT_memtag); opts::NeededLibraries = Args.hasArg(OPT_needed_libs); opts::Notes = Args.hasArg(OPT_notes); + opts::Offloading = Args.hasArg(OPT_offloading); opts::PrettyPrint = Args.hasArg(OPT_pretty_print); opts::ProgramHeaders = Args.hasArg(OPT_program_headers); opts::SectionGroups = Args.hasArg(OPT_section_groups); @@ -459,6 +461,8 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer, Dumper->printGnuHashTable(); if (opts::VersionInfo) Dumper->printVersionInfo(); + if (opts::Offloading) + Dumper->printOffloading(Obj); if (opts::StringTable) Dumper->printStringTable(); if (Obj.isELF()) { @@ -707,6 +711,7 @@ int llvm_readobj_main(int argc, char **argv, const llvm::ToolContext &) { opts::DynamicTable = true; opts::Notes = true; opts::VersionInfo = true; + opts::Offloading = true; opts::UnwindInfo = true; opts::SectionGroups = true; opts::HashHistogram = true; diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt index ab709e3..0f8fcb9 100644 --- a/llvm/unittests/CAS/CMakeLists.txt +++ b/llvm/unittests/CAS/CMakeLists.txt @@ -1,7 +1,3 @@ -if (LLVM_ENABLE_ONDISK_CAS) - add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1) -endif() - set(LLVM_LINK_COMPONENTS Support CAS @@ -12,6 +8,7 @@ add_llvm_unittest(CASTests ActionCacheTest.cpp CASTestConfig.cpp ObjectStoreTest.cpp + OnDiskTrieRawHashMapTest.cpp ProgramTest.cpp ) diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp new file mode 100644 index 0000000..7bedfe4 --- /dev/null +++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp @@ -0,0 +1,220 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CAS/OnDiskTrieRawHashMap.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Testing/Support/Error.h" +#include "llvm/Testing/Support/SupportHelpers.h" +#include "gtest/gtest.h" + +#if LLVM_ENABLE_ONDISK_CAS +using namespace llvm; +using namespace llvm::cas; + +namespace { + +struct OnDiskTrieRawHashMapTestFixture + : public ::testing::TestWithParam<size_t> { + static constexpr size_t MB = 1024u * 1024u; + static constexpr size_t DataSize = 8; // Multiple of 8B. + + std::optional<unittest::TempDir> Temp; + size_t NumHashBytes; + + void SetUp() override { + Temp.emplace("trie-raw-hash-map", /*Unique=*/true); + NumHashBytes = GetParam(); + } + void TearDown() override { Temp.reset(); } + + Expected<OnDiskTrieRawHashMap> createTrie() { + size_t NumHashBits = NumHashBytes * 8; + return OnDiskTrieRawHashMap::create( + Temp->path((Twine(NumHashBytes) + "B").str()), "index", + /*NumHashBits=*/NumHashBits, DataSize, /*MaxFileSize=*/MB, + /*NewInitialFileSize=*/std::nullopt); + } +}; + +// Create tries with various sizes of hash and with data. +TEST_P(OnDiskTrieRawHashMapTestFixture, General) { + std::optional<OnDiskTrieRawHashMap> Trie1; + ASSERT_THAT_ERROR(createTrie().moveInto(Trie1), Succeeded()); + std::optional<OnDiskTrieRawHashMap> Trie2; + ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded()); + + uint8_t Hash0Bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + uint8_t Hash1Bytes[8] = {1, 0, 0, 0, 0, 0, 0, 0}; + auto Hash0 = ArrayRef(Hash0Bytes).take_front(NumHashBytes); + auto Hash1 = ArrayRef(Hash1Bytes).take_front(NumHashBytes); + constexpr StringLiteral Data0v1Bytes = "data0.v1"; + constexpr StringLiteral Data0v2Bytes = "data0.v2"; + constexpr StringLiteral Data1Bytes = "data1..."; + static_assert(Data0v1Bytes.size() == DataSize, "math error"); + static_assert(Data0v2Bytes.size() == DataSize, "math error"); + static_assert(Data1Bytes.size() == DataSize, "math error"); + ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size()); + ArrayRef<char> Data0v2 = ArrayRef(Data0v2Bytes.data(), Data0v2Bytes.size()); + ArrayRef<char> Data1 = ArrayRef(Data1Bytes.data(), Data1Bytes.size()); + + // Lookup when trie is empty. + EXPECT_FALSE(Trie1->find(Hash0)); + + // Insert. + std::optional<FileOffset> Offset; + std::optional<MutableArrayRef<char>> Data; + { + std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion), + Succeeded()); + EXPECT_EQ(Hash0, (*Insertion)->Hash); + EXPECT_EQ(Data0v1, (*Insertion)->Data); + EXPECT_TRUE(isAddrAligned(Align(8), (*Insertion)->Data.data())); + + Offset = Insertion->getOffset(); + Data = (*Insertion)->Data; + } + + // Find. + { + auto Lookup = Trie1->find(Hash0); + ASSERT_TRUE(Lookup); + EXPECT_EQ(Hash0, Lookup->Hash); + EXPECT_EQ(Data0v1, Lookup->Data); + EXPECT_EQ(Offset->get(), Lookup.getOffset().get()); + } + + // Find in a different instance of the same on-disk trie that existed + // before the insertion. + { + auto Lookup = Trie2->find(Hash0); + ASSERT_TRUE(Lookup); + EXPECT_EQ(Hash0, Lookup->Hash); + EXPECT_EQ(Data0v1, Lookup->Data); + EXPECT_EQ(Offset->get(), Lookup.getOffset().get()); + } + + // Create a new instance and check that too. + Trie2.reset(); + ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded()); + { + auto Lookup = Trie2->find(Hash0); + ASSERT_TRUE(Lookup); + EXPECT_EQ(Hash0, Lookup->Hash); + EXPECT_EQ(Data0v1, Lookup->Data); + EXPECT_EQ(Offset->get(), Lookup.getOffset().get()); + } + + // Change the data. + llvm::copy(Data0v2, Data->data()); + { + auto Lookup = Trie2->find(Hash0); + ASSERT_TRUE(Lookup); + EXPECT_EQ(Hash0, Lookup->Hash); + EXPECT_EQ(Data0v2, Lookup->Data); + EXPECT_EQ(Offset->get(), Lookup.getOffset().get()); + } + + // Find different hash. + EXPECT_FALSE(Trie1->find(Hash1)); + EXPECT_FALSE(Trie2->find(Hash1)); + + // Recover from an offset. + { + OnDiskTrieRawHashMap::const_pointer Recovered; + ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered), + Succeeded()); + ASSERT_TRUE(Recovered); + EXPECT_EQ(Offset->get(), Recovered.getOffset().get()); + EXPECT_EQ(Hash0, Recovered->Hash); + EXPECT_EQ(Data0v2, Recovered->Data); + } + + // Recover from a bad offset. + { + FileOffset BadOffset(1); + OnDiskTrieRawHashMap::const_pointer Recovered; + ASSERT_THAT_ERROR( + Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed()); + } + + // Insert another thing. + { + std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion), + Succeeded()); + EXPECT_EQ(Hash1, (*Insertion)->Hash); + EXPECT_EQ(Data1, (*Insertion)->Data); + EXPECT_TRUE(isAddrAligned(Align(8), (*Insertion)->Data.data())); + + EXPECT_NE(Offset->get(), Insertion->getOffset().get()); + } + + // Validate. + { + auto RecordVerify = + [&](FileOffset Offset, + OnDiskTrieRawHashMap::ConstValueProxy Proxy) -> Error { + if (Proxy.Hash.size() != NumHashBytes) + return createStringError("wrong hash size"); + if (Proxy.Data.size() != DataSize) + return createStringError("wrong data size"); + + return Error::success(); + }; + ASSERT_THAT_ERROR(Trie1->validate(RecordVerify), Succeeded()); + ASSERT_THAT_ERROR(Trie2->validate(RecordVerify), Succeeded()); + } + + // Size and capacity. + { + EXPECT_EQ(Trie1->capacity(), MB); + EXPECT_EQ(Trie2->capacity(), MB); + EXPECT_LE(Trie1->size(), MB); + EXPECT_LE(Trie2->size(), MB); + } +} + +INSTANTIATE_TEST_SUITE_P(OnDiskTrieRawHashMapTest, + OnDiskTrieRawHashMapTestFixture, + ::testing::Values(1, 2, 4, 8)); + +TEST(OnDiskTrieRawHashMapTest, OutOfSpace) { + unittest::TempDir Temp("trie-raw-hash-map", /*Unique=*/true); + std::optional<OnDiskTrieRawHashMap> Trie; + + // Too small to create header. + ASSERT_THAT_ERROR(OnDiskTrieRawHashMap::create( + Temp.path("NoSpace1").str(), "index", + /*NumHashBits=*/8, /*DataSize=*/8, /*MaxFileSize=*/8, + /*NewInitialFileSize=*/std::nullopt) + .moveInto(Trie), + Failed()); + + // Just enough for root node but not enough for any insertion. + ASSERT_THAT_ERROR(OnDiskTrieRawHashMap::create( + Temp.path("NoSpace2").str(), "index", + /*NumHashBits=*/8, /*DataSize=*/8, /*MaxFileSize=*/118, + /*NewInitialFileSize=*/std::nullopt, + /*NewTableNumRootBits=*/1, /*NewTableNumSubtrieBits=*/1) + .moveInto(Trie), + Succeeded()); + uint8_t Hash0Bytes[1] = {0}; + auto Hash0 = ArrayRef(Hash0Bytes); + constexpr StringLiteral Data0v1Bytes = "data0.v1"; + ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size()); + std::optional<OnDiskTrieRawHashMap::pointer> Insertion; + ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion), + Failed()); +} + +} // namespace + +#endif // LLVM_ENABLE_ONDISK_CAS diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp index 02eaed4..0ebbc58 100644 --- a/llvm/unittests/Support/MustacheTest.cpp +++ b/llvm/unittests/Support/MustacheTest.cpp @@ -998,7 +998,7 @@ TEST(MustachePartials, StandaloneIndentation) { std::string Out; raw_string_ostream OS(Out); T.render(D, OS); - EXPECT_NE("\\\n |\n <\n ->\n |\n/\n", Out); + EXPECT_EQ("\\\n |\n <\n ->\n |\n/\n", Out); } TEST(MustacheLambdas, BasicInterpolation) { @@ -1328,3 +1328,139 @@ TEST(MustacheTripleMustache, WithPadding) { T.render(D, OS); EXPECT_EQ("|---|", Out); } + +TEST(MustacheDelimiters, PairBehavior) { + Value D = Object{{"text", "Hey!"}}; + auto T = Template("{{=<% %>=}}(<%text%>)"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("(Hey!)", Out); +} + +TEST(MustacheDelimiters, SpecialCharacters) { + Value D = Object{{"text", "It worked!"}}; + auto T = Template("({{=[ ]=}}[text])"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("(It worked!)", Out); +} + +TEST(MustacheDelimiters, Sections) { + Value D = Object{{"section", true}, {"data", "I got interpolated."}}; + auto T = + Template("[\n{{#section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " + "| | =}}\n|#section|\n {{data}}\n |data|\n|/section|\n]\n"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got " + "interpolated.\n]\n", + Out); +} + +TEST(MustacheDelimiters, InvertedSections) { + Value D = Object{{"section", false}, {"data", "I got interpolated."}}; + auto T = + Template("[\n{{^section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= " + "| | =}}\n|^section|\n {{data}}\n |data|\n|/section|\n]\n"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got " + "interpolated.\n]\n", + Out); +} + +TEST(MustacheDelimiters, PartialInheritence) { + Value D = Object{{"value", "yes"}}; + auto T = Template("[ {{>include}} ]\n{{= | | =}}\n[ |>include| ]\n"); + T.registerPartial("include", ".{{value}}."); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("[ .yes. ]\n[ .yes. ]\n", Out); +} + +TEST(MustacheDelimiters, PostPartialBehavior) { + Value D = Object{{"value", "yes"}}; + auto T = Template("[ {{>include}} ]\n[ .{{value}}. .|value|. ]\n"); + T.registerPartial("include", ".{{value}}. {{= | | =}} .|value|."); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out); +} + +TEST(MustacheDelimiters, SurroundingWhitespace) { + Value D = Object{}; + auto T = Template("| {{=@ @=}} |"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_EQ("| |", Out); +} + +TEST(MustacheDelimiters, OutlyingWhitespaceInline) { + Value D = Object{}; + auto T = Template(" | {{=@ @=}}\n"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_EQ(" | \n", Out); +} + +TEST(MustacheDelimiters, StandaloneTag) { + Value D = Object{}; + auto T = Template("Begin.\n{{=@ @=}}\nEnd.\n"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("Begin.\nEnd.\n", Out); +} + +TEST(MustacheDelimiters, IndentedStandaloneTag) { + Value D = Object{}; + auto T = Template("Begin.\n {{=@ @=}}\nEnd.\n"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("Begin.\nEnd.\n", Out); +} + +TEST(MustacheDelimiters, StandaloneLineEndings) { + Value D = Object{}; + auto T = Template("|\r\n{{= @ @ =}}\r\n|"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("|\r\n|", Out); +} + +TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) { + Value D = Object{}; + auto T = Template(" {{=@ @=}}\n="); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("=", Out); +} + +TEST(MustacheDelimiters, StandaloneWithoutNewline) { + Value D = Object{}; + auto T = Template("=\n {{=@ @=}}"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_NE("=\n", Out); +} + +TEST(MustacheDelimiters, PairwithPadding) { + Value D = Object{}; + auto T = Template("|{{= @ @ =}}|"); + std::string Out; + raw_string_ostream OS(Out); + T.render(D, OS); + EXPECT_EQ("||", Out); +} diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index a2b9e56..08c8944 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -838,8 +838,6 @@ Transforms/InstCombine/2011-02-14-InfLoop.ll Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll -Transforms/InstCombine/add2.ll -Transforms/InstCombine/add.ll Transforms/InstCombine/add-mask.ll Transforms/InstCombine/add-shl-mul-umax.ll Transforms/InstCombine/add-shl-sdiv-to-srem.ll diff --git a/mlir/docs/Tutorials/Toy/Ch-6.md b/mlir/docs/Tutorials/Toy/Ch-6.md index 529de55..178c073 100644 --- a/mlir/docs/Tutorials/Toy/Ch-6.md +++ b/mlir/docs/Tutorials/Toy/Ch-6.md @@ -245,7 +245,7 @@ define void @main() ``` The full code listing for dumping LLVM IR can be found in -`examples/toy/Ch6/toy.cpp` in the `dumpLLVMIR()` function: +`examples/toy/Ch6/toyc.cpp` in the `dumpLLVMIR()` function: ```c++ diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index 262e0e7..cc6314c 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -48,8 +48,8 @@ static bool isStaticStrideOrOffset(int64_t strideOrOffset) { } static FailureOr<LLVM::LLVMFuncOp> -getFreeFn(OpBuilder &b, const LLVMTypeConverter *typeConverter, ModuleOp module, - SymbolTableCollection *symbolTables) { +getFreeFn(OpBuilder &b, const LLVMTypeConverter *typeConverter, + Operation *module, SymbolTableCollection *symbolTables) { bool useGenericFn = typeConverter->getOptions().useGenericFunctions; if (useGenericFn) @@ -483,8 +483,8 @@ public: ConversionPatternRewriter &rewriter) const override { // Insert the `free` declaration if it is not already present. FailureOr<LLVM::LLVMFuncOp> freeFunc = - getFreeFn(rewriter, getTypeConverter(), op->getParentOfType<ModuleOp>(), - symbolTables); + getFreeFn(rewriter, getTypeConverter(), + op->getParentWithTrait<OpTrait::SymbolTable>(), symbolTables); if (failed(freeFunc)) return failure(); Value allocatedPtr; diff --git a/mlir/test/Dialect/GPU/memref-to-llvm.mlir b/mlir/test/Dialect/GPU/memref-to-llvm.mlir new file mode 100644 index 0000000..81a96bf --- /dev/null +++ b/mlir/test/Dialect/GPU/memref-to-llvm.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-opt --convert-to-llvm %s | FileCheck %s + +// Checking that malloc and free are declared in the proper module. + +// CHECK: module attributes {gpu.container_module} { +// CHECK: llvm.func @free(!llvm.ptr) +// CHECK: llvm.func @malloc(i64) -> !llvm.ptr +// CHECK: gpu.module @kernels { +// CHECK: llvm.func @free(!llvm.ptr) +// CHECK: llvm.func @malloc(i64) -> !llvm.ptr +// CHECK: gpu.func @kernel_1 +// CHECK: llvm.call @malloc({{.*}}) : (i64) -> !llvm.ptr +// CHECK: llvm.call @free({{.*}}) : (!llvm.ptr) -> () +// CHECK: gpu.return +// CHECK: } +// CHECK: } +// CHECK: } +module attributes {gpu.container_module} { + + gpu.module @kernels { + gpu.func @kernel_1() kernel { + %memref_a = memref.alloc() : memref<8x16xf32> + memref.dealloc %memref_a : memref<8x16xf32> + gpu.return + } + } + + func.func @main() { + %memref_a = memref.alloc() : memref<8x16xf32> + memref.dealloc %memref_a : memref<8x16xf32> + return + } +} diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 39286d4..a1950cb 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -330,6 +330,54 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, return Rc; } +/// Returns a buffer of the requested \p Size, to be used as the source for +/// `submitData`. +/// +/// For small buffers (`Size <= sizeof(void*)`), uses \p AsyncInfo's +/// getVoidPtrLocation(). +/// For larger buffers, creates a dynamic buffer which will be eventually +/// deleted by \p AsyncInfo's post-processing callback. +static char *getOrCreateSourceBufferForSubmitData(AsyncInfoTy &AsyncInfo, + int64_t Size) { + constexpr int64_t VoidPtrSize = sizeof(void *); + + if (Size <= VoidPtrSize) { + void *&BufferElement = AsyncInfo.getVoidPtrLocation(); + return reinterpret_cast<char *>(&BufferElement); + } + + // Create a dynamic buffer for larger data and schedule its deletion. + char *DataBuffer = new char[Size]; + AsyncInfo.addPostProcessingFunction([DataBuffer]() { + delete[] DataBuffer; + return OFFLOAD_SUCCESS; + }); + return DataBuffer; +} + +/// Calculates the target pointee base by applying the host +/// pointee begin/base delta to the target pointee begin. +/// +/// ``` +/// TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase) +/// ``` +static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin, + void *TgtPteeBegin) { + uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) - + reinterpret_cast<uint64_t>(HstPteeBase); + void *TgtPteeBase = reinterpret_cast<void *>( + reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta); + + DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD + ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n", + DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta); + DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD + "\n", + DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin)); + + return TgtPteeBase; +} + /// Utility function to perform a pointer attachment operation. /// /// For something like: @@ -399,16 +447,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, constexpr int64_t VoidPtrSize = sizeof(void *); assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small"); - uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) - - reinterpret_cast<uint64_t>(HstPteeBase); - void *TgtPteeBase = reinterpret_cast<void *>( - reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta); - DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD - ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n", - DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta); - DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD - "\n", - DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin)); + void *TgtPteeBase = + calculateTargetPointeeBase(HstPteeBase, HstPteeBegin, TgtPteeBegin); // Add shadow pointer tracking if (!PtrTPR.getEntry()->addShadowPointer( @@ -435,48 +475,32 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, return OFFLOAD_SUCCESS; }; - bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize; - if (!IsPtrAFortranDescriptor) { - // For "regular" pointers, we can use the VoidPtrLocation from AsyncInfo as - // the buffer space for the submission. - void *&BufferElement = AsyncInfo.getVoidPtrLocation(); - BufferElement = TgtPteeBase; - - // Submit the updated pointer value to device - return HandleSubmitResult(Device.submitData( - TgtPtrAddr, &BufferElement, VoidPtrSize, AsyncInfo, PtrTPR.getEntry())); + // Get a buffer to be used as the source for data submission. + char *SrcBuffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize); + + // The pointee's address should occupy the first VoidPtrSize bytes + // irrespective of HstPtrSize. + std::memcpy(SrcBuffer, &TgtPteeBase, VoidPtrSize); + + // For larger "pointers" (e.g., Fortran descriptors), copy remaining + // descriptor fields from the host descriptor into the buffer. + if (HstPtrSize > VoidPtrSize) { + uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize; + void *HstDescriptorFieldsAddr = + reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize; + std::memcpy(SrcBuffer + VoidPtrSize, HstDescriptorFieldsAddr, + HstDescriptorFieldsSize); + + DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD + ") (pointer + %" PRId64 " additional bytes from host descriptor " DPxMOD + ")\n", + HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize, + DPxPTR(HstDescriptorFieldsAddr)); } - // For larger "pointers" (like Fortran's descriptors), we create a dynamic - // buffer, which will be eventually destroyed by AsyncInfo's post-processing - // callback. - char *DataBuffer = new char[HstPtrSize]; - - // For such descriptors, to the first VoidPtrSize bytes, we store the - // pointee's device address. - std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *)); - - // And to the remaining bytes, we copy the remaining contents of the host - // descriptor after the initial VoidPtrSize bytes. - uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize; - void *HstDescriptorFieldsAddr = - reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize; - std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr, - HstDescriptorFieldsSize); - - DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64 - " additional bytes from host descriptor " DPxMOD ")\n", - HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize, - DPxPTR(HstDescriptorFieldsAddr)); - - // Submit the entire buffer to device - int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize, + // Submit the populated source buffer to device. + int SubmitResult = Device.submitData(TgtPtrAddr, SrcBuffer, HstPtrSize, AsyncInfo, PtrTPR.getEntry()); - - AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int { - delete[] DataBuffer; - return OFFLOAD_SUCCESS; - }); return HandleSubmitResult(SubmitResult); } @@ -525,10 +549,17 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // ATTACH map-types are supposed to be handled after all mapping for the // construct is done. Defer their processing. if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) { - AttachInfo->AttachEntries.emplace_back( - /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin, - /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I], - /*PointeeName=*/HstPtrName); + const bool IsCorrespondingPointerInit = + (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE); + // We don't need to keep track of PRIVATE | ATTACH entries. They + // represent corresponding-pointer-initialization, and are handled + // similar to firstprivate (PRIVATE | TO) entries by + // PrivateArgumentManager. + if (!IsCorrespondingPointerInit) + AttachInfo->AttachEntries.emplace_back( + /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin, + /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I], + /*PointeeName=*/HstPtrName); DP("Deferring ATTACH map-type processing for argument %d\n", I); continue; @@ -1397,13 +1428,24 @@ class PrivateArgumentManagerTy { uint32_t Padding; /// Host pointer name map_var_info_t HstPtrName = nullptr; + /// For corresponding-pointer-initialization: host pointee base address. + void *HstPteeBase = nullptr; + /// For corresponding-pointer-initialization: host pointee begin address. + void *HstPteeBegin = nullptr; + /// Whether this argument needs corresponding-pointer-initialization. + bool IsCorrespondingPointerInit = false; FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size, uint32_t Alignment, uint32_t Padding, - map_var_info_t HstPtrName = nullptr) + map_var_info_t HstPtrName = nullptr, + void *HstPteeBase = nullptr, + void *HstPteeBegin = nullptr, + bool IsCorrespondingPointerInit = false) : HstPtrBegin(reinterpret_cast<char *>(HstPtr)), HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment), - Size(Size), Padding(Padding), HstPtrName(HstPtrName) {} + Size(Size), Padding(Padding), HstPtrName(HstPtrName), + HstPteeBase(HstPteeBase), HstPteeBegin(HstPteeBegin), + IsCorrespondingPointerInit(IsCorrespondingPointerInit) {} }; /// A vector of target pointers for all private arguments @@ -1421,6 +1463,153 @@ class PrivateArgumentManagerTy { /// A pointer to a \p AsyncInfoTy object AsyncInfoTy &AsyncInfo; + /// \returns the value of the target pointee's base to be used for + /// corresponding-pointer-initialization. + void *getTargetPointeeBaseForCorrespondingPointerInitialization( + void *HstPteeBase, void *HstPteeBegin) { + // See if the pointee's begin address has corresponding storage on device. + void *TgtPteeBegin = [&]() -> void * { + if (!HstPteeBegin) { + DP("Corresponding-pointer-initialization: pointee begin address is " + "null\n"); + return nullptr; + } + + return Device.getMappingInfo() + .getTgtPtrBegin(HstPteeBegin, /*Size=*/0, /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false) + .TargetPointer; + }(); + + // If it does, we calculate target pointee base using it, and return it. + // Otherwise, we retain the host pointee's base as the target pointee base + // of the initialized pointer. It's the user's responsibility to ensure + // that if a lookup fails, the host pointee is accessible on the device. + return TgtPteeBegin ? calculateTargetPointeeBase(HstPteeBase, HstPteeBegin, + TgtPteeBegin) + : HstPteeBase; + } + + /// Initialize the source buffer for corresponding-pointer-initialization. + /// + /// It computes and stores the target pointee base address (or the host + /// pointee's base address, if lookup of target pointee fails) to the first + /// `sizeof(void*)` bytes of \p Buffer, and for larger pointers + /// (Fortran descriptors), the remaining fields of the host descriptor + /// \p HstPtr after those `sizeof(void*)` bytes. + /// + /// Corresponding-pointer-initialization represents the initialization of the + /// private version of a base-pointer/referring-pointer on a target construct. + /// + /// For example, for the following test: + /// ```cpp + /// int x[10]; + /// int *px = &x[0]; + /// ... + /// #pragma omp target data map(tofrom:px) + /// { + /// int **ppx = omp_get_mapped_ptr(&px, omp_get_default_device()); + /// #pragma omp target map(tofrom:px[1]) is_device_ptr(ppx) + /// { + /// foo(px, ppx); + /// } + /// } + /// ``` + /// The following shows a possible way to implement the mapping of `px`, + /// which is pre-determined firstprivate and should get initialized + /// via corresponding-pointer-initialization: + /// + /// (A) Possible way to implement the above with PRIVATE | ATTACH: + /// ```llvm + /// ; maps for px: + /// ; &px[0], &px[1], sizeof(px[1]), TO | FROM // (1) + /// ; &px, &px[1], sizeof(px), ATTACH // (2) + /// ; &px, &px[1], sizeof(px), PRIVATE | ATTACH | PARAM // (3) + /// call... @__omp_outlined...(ptr %px, ptr %ppx) + /// define ... @__omp_outlined(ptr %px, ptr %ppx) {... + /// foo(%px, %ppx) + /// ...} + /// ``` + /// `(1)` maps the pointee `px[1]. + /// `(2)` attaches it to the mapped version of `px`. It can be controlled by + /// the user based on the `attach(auto/always/never)` map-type modifier. + /// `(3)` privatizes and initializes the private pointer `px`, and passes it + /// into the kernel as the argument `%px`. Can be skipped if `px` is not + /// referenced in the target construct. + /// + /// While this method is not too beneficial compared to just doing the + /// initialization in the body of the kernel, like: + /// (B) Possible way to implement the above without PRIVATE | ATTACH: + /// ```llvm + /// ; maps for px: + /// ; &px[0], &px[1], sizeof(px[1]), TO | FROM | PARAM // (4) + /// ; &px, &px[1], sizeof(px), ATTACH // (5) + /// call... @__omp_outlined...(ptr %px0, ptr %ppx) + /// define ... __omp_outlined...(ptr %px0, ptr %ppx) { + /// %px = alloca ptr; + /// store ptr %px0, ptr %px + /// foo(%px, %ppx) + /// } + /// ``` + /// + /// (B) is not so convenient for Fortran descriptors, because in + /// addition to the lookup, the remaining fields of the descriptor have + /// to be passed into the kernel to initialize the private copy, which + /// makes (A) a cleaner option for them. e.g. + /// ```f90 + /// integer, pointer :: p(:) + /// !$omp target map(p(1)) + /// ``` + /// + /// (C) Possible mapping for the above Fortran test using PRIVATE | ATTACH: + /// ```llvm + /// ; maps for p: + /// ; &p(1), &p(1), sizeof(p(1)), TO | FROM + /// ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), ATTACH + /// ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), PRIVATE | ATTACH | PARAM + /// call... @__omp_outlined...(ptr %ref_ptr_of_p) + void initBufferForCorrespondingPointerInitialization(char *Buffer, + void *HstPtr, + int64_t HstPtrSize, + void *HstPteeBase, + void *HstPteeBegin) { + constexpr int64_t VoidPtrSize = sizeof(void *); + assert(HstPtrSize >= VoidPtrSize && + "corresponding-pointer-initialization: pointer size is too small"); + + void *TgtPteeBase = + getTargetPointeeBaseForCorrespondingPointerInitialization(HstPteeBase, + HstPteeBegin); + + // Store the target pointee base address to the first VoidPtrSize bytes + DP("Initializing corresponding-pointer-initialization source buffer " + "for " DPxMOD ", with pointee base " DPxMOD "\n", + DPxPTR(HstPtr), DPxPTR(TgtPteeBase)); + std::memcpy(Buffer, &TgtPteeBase, VoidPtrSize); + if (HstPtrSize <= VoidPtrSize) + return; + + // For Fortran descriptors, copy the remaining descriptor fields from host + uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize; + void *HstDescriptorFieldsAddr = static_cast<char *>(HstPtr) + VoidPtrSize; + DP("Copying %" PRId64 + " bytes of descriptor fields into corresponding-pointer-initialization " + "buffer at offset %" PRId64 ", from " DPxMOD "\n", + HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr)); + std::memcpy(Buffer + VoidPtrSize, HstDescriptorFieldsAddr, + HstDescriptorFieldsSize); + } + + /// Helper function to create and initialize a buffer to be used as the source + /// for corresponding-pointer-initialization. + void *createAndInitSourceBufferForCorrespondingPointerInitialization( + void *HstPtr, int64_t HstPtrSize, void *HstPteeBase, void *HstPteeBegin) { + char *Buffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize); + initBufferForCorrespondingPointerInitialization(Buffer, HstPtr, HstPtrSize, + HstPteeBase, HstPteeBegin); + return Buffer; + } + // TODO: What would be the best value here? Should we make it configurable? // If the size is larger than this threshold, we will allocate and transfer it // immediately instead of packing it. @@ -1435,7 +1624,9 @@ public: int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset, bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex, map_var_info_t HstPtrName = nullptr, - const bool AllocImmediately = false) { + const bool AllocImmediately = false, void *HstPteeBase = nullptr, + void *HstPteeBegin = nullptr, + bool IsCorrespondingPointerInit = false) { // If the argument is not first-private, or its size is greater than a // predefined threshold, we will allocate memory and issue the transfer // immediately. @@ -1458,9 +1649,19 @@ public: // If first-private, copy data from host if (IsFirstPrivate) { DP("Submitting firstprivate data to the device.\n"); - int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo); + + // The source value used for corresponding-pointer-initialization + // is different vs regular firstprivates. + void *DataSource = + IsCorrespondingPointerInit + ? createAndInitSourceBufferForCorrespondingPointerInitialization( + HstPtr, ArgSize, HstPteeBase, HstPteeBegin) + : HstPtr; + int Ret = Device.submitData(TgtPtr, DataSource, ArgSize, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { - DP("Copying data to device failed, failed.\n"); + DP("Copying %s data to device failed.\n", + IsCorrespondingPointerInit ? "corresponding-pointer-initialization" + : "firstprivate"); return OFFLOAD_FAIL; } } @@ -1506,8 +1707,10 @@ public: } } - FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize, - StartAlignment, Padding, HstPtrName); + FirstPrivateArgInfo.emplace_back( + TgtArgsIndex, HstPtr, ArgSize, StartAlignment, Padding, HstPtrName, + HstPteeBase, HstPteeBegin, IsCorrespondingPointerInit); + FirstPrivateArgSize += Padding + ArgSize; } @@ -1526,7 +1729,13 @@ public: for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) { // First pad the pointer as we (have to) pad it on the device too. Itr = std::next(Itr, Info.Padding); - std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr); + + if (Info.IsCorrespondingPointerInit) + initBufferForCorrespondingPointerInitialization( + &*Itr, Info.HstPtrBegin, Info.Size, Info.HstPteeBase, + Info.HstPteeBegin); + else + std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr); Itr = std::next(Itr, Info.Size); } // Allocate target memory @@ -1682,8 +1891,40 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, TgtPtrBegin = HstPtrBase; TgtBaseOffset = 0; } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) { + // For cases like: + // ``` + // int *p = ...; + // #pragma omp target map(p[0:10]) + // ``` + // `p` is predetermined firstprivate on the target construct, and the + // method to determine the initial value of the private copy on the + // device is called "corresponding-pointer-initialization". + // + // Such firstprivate pointers that need + // corresponding-pointer-initialization are represented using the + // `PRIVATE | ATTACH` map-types, in contrast to regular firstprivate + // entries, which use `PRIVATE | TO`. The structure of these + // `PRIVATE | ATTACH` entries is the same as the non-private + // `ATTACH` entries used to represent pointer-attachments, i.e.: + // ``` + // &hst_ptr_base/begin, &hst_ptee_begin, sizeof(hst_ptr) + // ``` + const bool IsAttach = (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH); + void *HstPteeBase = nullptr; + void *HstPteeBegin = nullptr; + if (IsAttach) { + // For corresponding-pointer-initialization, Args[I] is HstPteeBegin, + // and ArgBases[I] is both HstPtrBase/HstPtrBegin. + HstPteeBase = *reinterpret_cast<void **>(HstPtrBase); + HstPteeBegin = Args[I]; + HstPtrBegin = ArgBases[I]; + } TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; - const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO); + // Corresponding-pointer-initialization is a special case of firstprivate, + // since it also involves initializing the private pointer. + const bool IsFirstPrivate = + (ArgTypes[I] & OMP_TGT_MAPTYPE_TO) || IsAttach; + // If there is a next argument and it depends on the current one, we need // to allocate the private memory immediately. If this is not the case, // then the argument can be marked for optimization and packed with the @@ -1692,9 +1933,11 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF)); Ret = PrivateArgumentManager.addArg( HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin, - TgtArgs.size(), HstPtrName, AllocImmediately); + /*TgtArgsIndex=*/TgtArgs.size(), HstPtrName, AllocImmediately, + HstPteeBase, HstPteeBegin, /*IsCorrespondingPointerInit=*/IsAttach); if (Ret != OFFLOAD_SUCCESS) { - REPORT("Failed to process %sprivate argument " DPxMOD "\n", + REPORT("Failed to process %s%sprivate argument " DPxMOD "\n", + IsAttach ? "corresponding-pointer-initialization " : "", (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin)); return OFFLOAD_FAIL; } diff --git a/offload/test/mapping/lambda_by_value.cpp b/offload/test/mapping/lambda_by_value.cpp index 5516dedd..4c0278d 100644 --- a/offload/test/mapping/lambda_by_value.cpp +++ b/offload/test/mapping/lambda_by_value.cpp @@ -1,4 +1,5 @@ -// RUN: %libomptarget-compilexx-run-and-check-generic +// RUN: %libomptarget-compileopt-generic -fno-exceptions +// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic #include <stdint.h> #include <stdio.h> diff --git a/offload/test/mapping/map_back_race.cpp b/offload/test/mapping/map_back_race.cpp index 8a988d3..49bbe87 100644 --- a/offload/test/mapping/map_back_race.cpp +++ b/offload/test/mapping/map_back_race.cpp @@ -2,6 +2,9 @@ // Taken from https://github.com/llvm/llvm-project/issues/54216 +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: gpu + #include <algorithm> #include <cstdlib> #include <iostream> diff --git a/offload/test/mapping/map_both_pointer_pointee.c b/offload/test/mapping/map_both_pointer_pointee.c index 7be1ba4..1934b70 100644 --- a/offload/test/mapping/map_both_pointer_pointee.c +++ b/offload/test/mapping/map_both_pointer_pointee.c @@ -1,11 +1,10 @@ -// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu -// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu -// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu -// RUN: %libomptarget-compile-run-and-check-x86_64-unknown-linux-gnu -// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: unified_shared_memory // UNSUPPORTED: amdgcn-amd-amdhsa +// +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// XFAIL: nvidiagpu #pragma omp declare target int *ptr1; diff --git a/offload/test/mapping/map_ptr_and_star_local.c b/offload/test/mapping/map_ptr_and_star_local.c index cc826b3..97fa7cd 100644 --- a/offload/test/mapping/map_ptr_and_star_local.c +++ b/offload/test/mapping/map_ptr_and_star_local.c @@ -1,6 +1,9 @@ -// RUN: %libomptarget-compilexx-run-and-check-generic +// RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: libc +// +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// XFAIL: gpu #include <omp.h> #include <stdio.h> diff --git a/offload/test/mapping/map_structptr_and_member_global.c b/offload/test/mapping/map_structptr_and_member_global.c index 960eea4..f855e87 100644 --- a/offload/test/mapping/map_structptr_and_member_global.c +++ b/offload/test/mapping/map_structptr_and_member_global.c @@ -1,6 +1,9 @@ -// RUN: %libomptarget-compilexx-run-and-check-generic +// RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: libc +// +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// XFAIL: gpu #include <omp.h> #include <stdio.h> diff --git a/offload/test/mapping/map_structptr_and_member_local.c b/offload/test/mapping/map_structptr_and_member_local.c index bd75940..bd9e2a8 100644 --- a/offload/test/mapping/map_structptr_and_member_local.c +++ b/offload/test/mapping/map_structptr_and_member_local.c @@ -1,6 +1,9 @@ -// RUN: %libomptarget-compilexx-run-and-check-generic +// RUN: %libomptarget-compile-run-and-check-generic // REQUIRES: libc +// +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// XFAIL: gpu #include <omp.h> #include <stdio.h> diff --git a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu index 1f84a0e..b2e1edf 100644 --- a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu +++ b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu @@ -5,10 +5,10 @@ // RUN: %t | %fcheck-generic // clang-format on -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-unknown-linux-gnu -// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO +// REQUIRES: gpu +// +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// XFAIL: gpu #include <stdio.h> diff --git a/offload/test/offloading/bug51781.c b/offload/test/offloading/bug51781.c index 2f30b03..ff7fa51 100644 --- a/offload/test/offloading/bug51781.c +++ b/offload/test/offloading/bug51781.c @@ -16,6 +16,7 @@ // the generic state machine. // // RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt \ +// RUN: -Xoffload-linker -mllvm=-openmp-opt-disable-spmdization \ // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom @@ -24,7 +25,9 @@ // Repeat with reduction clause, which has managed to break the custom state // machine in the past. // -// RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt -DADD_REDUCTION \ +// RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt \ +// RUN: -DADD_REDUCTION \ +// RUN: -Xoffload-linker -mllvm=-openmp-opt-disable-spmdization \ // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90 index b9c2d34..b44c0b2 100644 --- a/offload/test/offloading/fortran/declare-target-automap.f90 +++ b/offload/test/offloading/fortran/declare-target-automap.f90 @@ -1,6 +1,9 @@ !Offloading test for AUTOMAP modifier in declare target enter ! REQUIRES: flang, amdgpu +! FIXME: https://github.com/llvm/llvm-project/issues/161265 +! XFAIL: amdgpu + ! RUN: %libomptarget-compile-fortran-run-and-check-generic program automap_program use iso_c_binding, only: c_loc diff --git a/offload/test/offloading/interop.c b/offload/test/offloading/interop.c index 26287e3..d9fa2ef 100644 --- a/offload/test/offloading/interop.c +++ b/offload/test/offloading/interop.c @@ -1,5 +1,6 @@ // RUN: %libomptarget-compile-run-and-check-generic -// REQUIRES: nvptx64-nvidia-cuda + +// XFAIL: * #include <assert.h> #include <omp.h> diff --git a/offload/test/offloading/single_threaded_for_barrier_hang_1.c b/offload/test/offloading/single_threaded_for_barrier_hang_1.c index 8ee6b51..a007521 100644 --- a/offload/test/offloading/single_threaded_for_barrier_hang_1.c +++ b/offload/test/offloading/single_threaded_for_barrier_hang_1.c @@ -1,6 +1,9 @@ // RUN: %libomptarget-compile-run-and-check-generic // RUN: %libomptarget-compileopt-run-and-check-generic +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: gpu + #include <omp.h> #include <stdio.h> diff --git a/offload/test/offloading/single_threaded_for_barrier_hang_2.c b/offload/test/offloading/single_threaded_for_barrier_hang_2.c index a98abd6..cabd2ed 100644 --- a/offload/test/offloading/single_threaded_for_barrier_hang_2.c +++ b/offload/test/offloading/single_threaded_for_barrier_hang_2.c @@ -1,6 +1,7 @@ // RUN: %libomptarget-compile-run-and-check-generic -// FIXME: This fails with optimization enabled and prints b: 0 -// FIXME: RUN: %libomptarget-compileopt-run-and-check-generic + +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: gpu #include <omp.h> #include <stdio.h> diff --git a/offload/test/offloading/spmdization.c b/offload/test/offloading/spmdization.c index 7f3f47d..48627cd 100644 --- a/offload/test/offloading/spmdization.c +++ b/offload/test/offloading/spmdization.c @@ -2,7 +2,8 @@ // RUN: %libomptarget-compileopt-generic // RUN: env LIBOMPTARGET_INFO=16 \ // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SPMD -// RUN: %libomptarget-compileopt-generic -mllvm --openmp-opt-disable-spmdization +// RUN: %libomptarget-compileopt-generic -mllvm --openmp-opt-disable-spmdization \ +// RUN: -Xoffload-linker -mllvm=--openmp-opt-disable-spmdization // RUN: env LIBOMPTARGET_INFO=16 \ // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,GENERIC // clang-format on diff --git a/offload/test/sanitizer/ptr_outside_alloc_1.c b/offload/test/sanitizer/ptr_outside_alloc_1.c index bdd0283..b30ce12e 100644 --- a/offload/test/sanitizer/ptr_outside_alloc_1.c +++ b/offload/test/sanitizer/ptr_outside_alloc_1.c @@ -5,12 +5,10 @@ // RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE // clang-format on -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-unknown-linux-gnu -// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO -// UNSUPPORTED: s390x-ibm-linux-gnu -// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: nvidiagpu +// +// REQUIRES: gpu #include <omp.h> diff --git a/offload/test/sanitizer/ptr_outside_alloc_2.c b/offload/test/sanitizer/ptr_outside_alloc_2.c index 6a67962..3bb8bda 100644 --- a/offload/test/sanitizer/ptr_outside_alloc_2.c +++ b/offload/test/sanitizer/ptr_outside_alloc_2.c @@ -3,12 +3,10 @@ // RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK // clang-format on -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-unknown-linux-gnu -// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO -// UNSUPPORTED: s390x-ibm-linux-gnu -// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: nvidiagpu +// +// REQUIRES: gpu #include <omp.h> diff --git a/offload/test/sanitizer/use_after_free_1.c b/offload/test/sanitizer/use_after_free_1.c index c4783c5c..acc1de3 100644 --- a/offload/test/sanitizer/use_after_free_1.c +++ b/offload/test/sanitizer/use_after_free_1.c @@ -5,12 +5,10 @@ // RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE // clang-format on -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-unknown-linux-gnu -// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO -// UNSUPPORTED: s390x-ibm-linux-gnu -// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: nvidiagpu +// +// REQUIRES: gpu #include <omp.h> diff --git a/offload/test/sanitizer/use_after_free_2.c b/offload/test/sanitizer/use_after_free_2.c index 1c1e097..3d70fb7 100644 --- a/offload/test/sanitizer/use_after_free_2.c +++ b/offload/test/sanitizer/use_after_free_2.c @@ -3,12 +3,10 @@ // RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK // clang-format on -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-unknown-linux-gnu -// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO -// UNSUPPORTED: s390x-ibm-linux-gnu -// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// FIXME: https://github.com/llvm/llvm-project/issues/161265 +// UNSUPPORTED: nvidiagpu +// +// REQUIRES: gpu // If offload memory pooling is enabled for a large allocation, reuse error is // not detected. UNSUPPORTED: large_allocation_memory_pool diff --git a/offload/tools/deviceinfo/llvm-offload-device-info.cpp b/offload/tools/deviceinfo/llvm-offload-device-info.cpp index 67a6e07..9b58d67 100644 --- a/offload/tools/deviceinfo/llvm-offload-device-info.cpp +++ b/offload/tools/deviceinfo/llvm-offload-device-info.cpp @@ -137,7 +137,7 @@ ol_result_t printDeviceValue(std::ostream &S, ol_device_handle_t Dev, size_t Size; OFFLOAD_ERR(olGetDeviceInfoSize(Dev, Info, &Size)); Val.resize(Size); - OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, sizeof(Val), Val.data())); + OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, Size, Val.data())); doWrite<T, PK>(S, reinterpret_cast<T>(Val.data())); } else { T Val; |