aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp20
-rw-r--r--clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp6
-rw-r--r--clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h4
-rw-r--r--clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h2
-rw-r--r--clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp2
-rw-r--r--clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h2
-rw-r--r--clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c10
-rw-r--r--clang/docs/ReleaseNotes.rst2
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td2
-rw-r--r--clang/include/clang/CIR/Dialect/IR/CIROps.td39
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenClass.cpp42
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp7
-rw-r--r--clang/lib/CIR/CodeGen/CIRGenStmt.cpp1
-rw-r--r--clang/lib/CIR/Dialect/IR/CIRDialect.cpp128
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp5
-rw-r--r--clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp27
-rw-r--r--clang/lib/Parse/ParseExprCXX.cpp2
-rw-r--r--clang/lib/Sema/SemaExpr.cpp5
-rw-r--r--clang/test/CIR/CodeGen/complex.cpp37
-rw-r--r--clang/test/CIR/CodeGen/vbase.cpp82
-rw-r--r--clang/test/CIR/CodeGen/vector-ext.cpp20
-rw-r--r--clang/test/CIR/CodeGen/vector.cpp20
-rw-r--r--clang/test/CIR/IR/global-init.cir48
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp423
-rw-r--r--clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp103
-rw-r--r--clang/test/CodeGenCXX/gh56652.cpp41
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl21
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl28
-rw-r--r--clang/test/CodeGenOpenCL/builtins-amdgcn.cl3
-rw-r--r--clang/test/Parser/cxx2b-lambdas-ext-warns.cpp32
-rw-r--r--clang/test/SemaCXX/decltype.cpp78
-rw-r--r--clang/tools/clang-scan-deps/ClangScanDeps.cpp2
-rw-r--r--compiler-rt/test/builtins/Unit/fixunstfdi_test.c19
-rw-r--r--compiler-rt/test/builtins/Unit/multc3_test.c26
-rw-r--r--libc/src/string/memory_utils/op_generic.h15
-rw-r--r--libc/test/UnitTest/FEnvSafeTest.cpp2
-rw-r--r--lld/docs/ReleaseNotes.rst3
-rw-r--r--lld/test/wasm/archive-export.test3
-rw-r--r--lld/test/wasm/comdats.ll3
-rw-r--r--lld/test/wasm/visibility-hidden.ll3
-rw-r--r--lld/wasm/Driver.cpp15
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp37
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml104
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml109
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml109
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml4
-rw-r--r--lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml105
-rw-r--r--llvm/docs/CommandGuide/llvm-readelf.rst4
-rw-r--r--llvm/docs/CommandGuide/llvm-readobj.rst4
-rw-r--r--llvm/include/llvm/CAS/FileOffset.h39
-rw-r--r--llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h236
-rw-r--r--llvm/include/llvm/IR/ProfDataUtils.h8
-rw-r--r--llvm/include/llvm/Object/OffloadBundle.h2
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h20
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h6
-rw-r--r--llvm/lib/CAS/CMakeLists.txt5
-rw-r--r--llvm/lib/CAS/DatabaseFile.cpp123
-rw-r--r--llvm/lib/CAS/DatabaseFile.h153
-rw-r--r--llvm/lib/CAS/OnDiskTrieRawHashMap.cpp1178
-rw-r--r--llvm/lib/IR/ProfDataUtils.cpp7
-rw-r--r--llvm/lib/Object/OffloadBundle.cpp18
-rw-r--r--llvm/lib/Support/Mustache.cpp18
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp6
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp8
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll5
-rw-r--r--llvm/test/Transforms/InstCombine/preserve-profile.ll50
-rw-r--r--llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll677
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll2
-rw-r--r--llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml6
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test26
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test27
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.cpp12
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.h2
-rw-r--r--llvm/tools/llvm-readobj/Opts.td1
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.cpp5
-rw-r--r--llvm/unittests/CAS/CMakeLists.txt5
-rw-r--r--llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp220
-rw-r--r--llvm/unittests/Support/MustacheTest.cpp138
-rw-r--r--llvm/utils/profcheck-xfail.txt2
-rw-r--r--mlir/docs/Tutorials/Toy/Ch-6.md2
-rw-r--r--mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp8
-rw-r--r--mlir/test/Dialect/GPU/memref-to-llvm.mlir33
-rw-r--r--offload/libomptarget/omptarget.cpp371
-rw-r--r--offload/test/mapping/lambda_by_value.cpp3
-rw-r--r--offload/test/mapping/map_back_race.cpp3
-rw-r--r--offload/test/mapping/map_both_pointer_pointee.c9
-rw-r--r--offload/test/mapping/map_ptr_and_star_local.c5
-rw-r--r--offload/test/mapping/map_structptr_and_member_global.c5
-rw-r--r--offload/test/mapping/map_structptr_and_member_local.c5
-rw-r--r--offload/test/offloading/CUDA/basic_launch_multi_arg.cu8
-rw-r--r--offload/test/offloading/bug51781.c5
-rw-r--r--offload/test/offloading/fortran/declare-target-automap.f903
-rw-r--r--offload/test/offloading/interop.c3
-rw-r--r--offload/test/offloading/single_threaded_for_barrier_hang_1.c3
-rw-r--r--offload/test/offloading/single_threaded_for_barrier_hang_2.c5
-rw-r--r--offload/test/offloading/spmdization.c3
-rw-r--r--offload/test/sanitizer/ptr_outside_alloc_1.c10
-rw-r--r--offload/test/sanitizer/ptr_outside_alloc_2.c10
-rw-r--r--offload/test/sanitizer/use_after_free_1.c10
-rw-r--r--offload/test/sanitizer/use_after_free_2.c10
-rw-r--r--offload/tools/deviceinfo/llvm-offload-device-info.cpp2
113 files changed, 4581 insertions, 825 deletions
diff --git a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
index affa276..5770bf7 100644
--- a/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
+++ b/clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.cpp
@@ -164,6 +164,22 @@ getNewFieldsOrder(const RecordDecl *Definition,
return NewFieldsOrder;
}
+static bool isOrderValid(const RecordDecl *RD, ArrayRef<unsigned> FieldOrder) {
+ if (FieldOrder.empty())
+ return false;
+
+ // If there is a flexible array member in the struct, it must remain the last
+ // field.
+ if (RD->hasFlexibleArrayMember() &&
+ FieldOrder.back() != FieldOrder.size() - 1) {
+ llvm::errs()
+ << "Flexible array member must remain the last field in the struct\n";
+ return false;
+ }
+
+ return true;
+}
+
struct ReorderedStruct {
public:
ReorderedStruct(const RecordDecl *Decl, ArrayRef<unsigned> NewFieldsOrder)
@@ -662,7 +678,7 @@ public:
return;
SmallVector<unsigned, 4> NewFieldsOrder =
getNewFieldsOrder(RD, DesiredFieldsOrder);
- if (NewFieldsOrder.empty())
+ if (!isOrderValid(RD, NewFieldsOrder))
return;
ReorderedStruct RS{RD, NewFieldsOrder};
@@ -699,7 +715,7 @@ public:
std::unique_ptr<ASTConsumer> ReorderFieldsAction::newASTConsumer() {
return std::make_unique<ReorderingConsumer>(RecordName, DesiredFieldsOrder,
- Replacements);
+ Replacements);
}
} // namespace reorder_fields
diff --git a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp
index 78e58bc..36ac9a4 100644
--- a/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp
+++ b/clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.cpp
@@ -19,7 +19,7 @@ ComparisonInTempFailureRetryCheck::ComparisonInTempFailureRetryCheck(
StringRef Name, ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
RawRetryList(Options.get("RetryMacros", "TEMP_FAILURE_RETRY")) {
- StringRef(RawRetryList).split(RetryMacros, ",", -1, false);
+ RawRetryList.split(RetryMacros, ",", -1, false);
}
void ComparisonInTempFailureRetryCheck::storeOptions(
diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
index 227641d..1700502 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
@@ -92,7 +92,7 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name,
RawAssertList(Options.get("AssertMacros", "assert,NSAssert,NSCAssert")),
IgnoredFunctions(utils::options::parseListPair(
"__builtin_expect;", Options.get("IgnoredFunctions", ""))) {
- StringRef(RawAssertList).split(AssertMacros, ",", -1, false);
+ RawAssertList.split(AssertMacros, ",", -1, false);
}
// The options are explained in AssertSideEffectCheck.h.
diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp
index 3d839b5..837a86f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp
@@ -39,12 +39,12 @@ ExceptionEscapeCheck::ExceptionEscapeCheck(StringRef Name,
RawIgnoredExceptions(Options.get("IgnoredExceptions", "")) {
llvm::SmallVector<StringRef, 8> FunctionsThatShouldNotThrowVec,
IgnoredExceptionsVec;
- StringRef(RawFunctionsThatShouldNotThrow)
- .split(FunctionsThatShouldNotThrowVec, ",", -1, false);
+ RawFunctionsThatShouldNotThrow.split(FunctionsThatShouldNotThrowVec, ",", -1,
+ false);
FunctionsThatShouldNotThrow.insert_range(FunctionsThatShouldNotThrowVec);
llvm::StringSet<> IgnoredExceptions;
- StringRef(RawIgnoredExceptions).split(IgnoredExceptionsVec, ",", -1, false);
+ RawIgnoredExceptions.split(IgnoredExceptionsVec, ",", -1, false);
IgnoredExceptions.insert_range(IgnoredExceptionsVec);
Tracer.ignoreExceptions(std::move(IgnoredExceptions));
Tracer.ignoreBadAlloc(true);
diff --git a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
index ae6e202..bd1e7ba 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h
@@ -33,8 +33,8 @@ public:
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private:
- std::string RawFunctionsThatShouldNotThrow;
- std::string RawIgnoredExceptions;
+ StringRef RawFunctionsThatShouldNotThrow;
+ StringRef RawIgnoredExceptions;
llvm::StringSet<> FunctionsThatShouldNotThrow;
utils::ExceptionAnalyzer Tracer;
diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp
index aa95fad..b8bca72 100644
--- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp
@@ -20,7 +20,7 @@ ProperlySeededRandomGeneratorCheck::ProperlySeededRandomGeneratorCheck(
: ClangTidyCheck(Name, Context),
RawDisallowedSeedTypes(
Options.get("DisallowedSeedTypes", "time_t,std::time_t")) {
- StringRef(RawDisallowedSeedTypes).split(DisallowedSeedTypes, ',');
+ RawDisallowedSeedTypes.split(DisallowedSeedTypes, ',');
}
void ProperlySeededRandomGeneratorCheck::storeOptions(
diff --git a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
index ea30127..7da01cc 100644
--- a/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
+++ b/clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h
@@ -33,7 +33,7 @@ private:
void checkSeed(const ast_matchers::MatchFinder::MatchResult &Result,
const T *Func);
- std::string RawDisallowedSeedTypes;
+ StringRef RawDisallowedSeedTypes;
SmallVector<StringRef, 5> DisallowedSeedTypes;
};
diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
index 4084d71..b921819 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp
@@ -494,7 +494,7 @@ UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
NullMacrosStr(Options.get("NullMacros", "NULL")),
IgnoredTypes(utils::options::parseStringList(Options.get(
"IgnoredTypes", "_CmpUnspecifiedParam;^std::__cmp_cat::__unspec"))) {
- StringRef(NullMacrosStr).split(NullMacros, ",");
+ NullMacrosStr.split(NullMacros, ",");
}
void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp
index f9becee..3801fc0 100644
--- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp
@@ -23,7 +23,7 @@ ExceptionEscapeCheck::ExceptionEscapeCheck(StringRef Name,
llvm::SmallVector<StringRef, 8> IgnoredExceptionsVec;
llvm::StringSet<> IgnoredExceptions;
- StringRef(RawIgnoredExceptions).split(IgnoredExceptionsVec, ",", -1, false);
+ RawIgnoredExceptions.split(IgnoredExceptionsVec, ",", -1, false);
llvm::transform(IgnoredExceptionsVec, IgnoredExceptionsVec.begin(),
[](StringRef S) { return S.trim(); });
IgnoredExceptions.insert_range(IgnoredExceptionsVec);
diff --git a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h
index 39da124..757a933 100644
--- a/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h
+++ b/clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h
@@ -30,7 +30,7 @@ public:
void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
private:
- std::string RawIgnoredExceptions;
+ StringRef RawIgnoredExceptions;
utils::ExceptionAnalyzer Tracer;
};
diff --git a/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c b/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c
new file mode 100644
index 0000000..ef64350
--- /dev/null
+++ b/clang-tools-extra/test/clang-reorder-fields/FlexibleArrayMember.c
@@ -0,0 +1,10 @@
+// RUN: clang-reorder-fields -record-name Foo -fields-order z,y,x %s -- 2>&1 | FileCheck --check-prefix=CHECK-BAD %s
+// RUN: clang-reorder-fields -record-name Foo -fields-order y,x,z %s -- | FileCheck --check-prefix=CHECK-GOOD %s
+
+// CHECK-BAD: {{^Flexible array member must remain the last field in the struct}}
+
+struct Foo {
+ int x; // CHECK-GOOD: {{^ int y;}}
+ int y; // CHECK-GOOD-NEXT: {{^ int x;}}
+ int z[]; // CHECK-GOOD-NEXT: {{^ int z\[\];}}
+};
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e8deae5..79dc0b2 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -433,6 +433,8 @@ Bug Fixes to C++ Support
object type. (#GH151531)
- Suppress ``-Wdouble-promotion`` when explicitly asked for with C++ list initialization (#GH33409).
- Fix the result of `__builtin_is_implicit_lifetime` for types with a user-provided constructor. (#GH160610)
+- Correctly deduce return types in ``decltype`` expressions. (#GH160497) (#GH56652) (#GH116319) (#GH161196)
+- Fixed a crash in the pre-C++23 warning for attributes before a lambda declarator (#GH161070).
Bug Fixes to AST Handling
^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 4d9e123..c724136 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1141,7 +1141,7 @@ def warn_cxx23_compat_binding_pack : Warning<
def err_capture_default_first : Error<
"capture default must be first">;
def ext_decl_attrs_on_lambda : ExtWarn<
- "%select{an attribute specifier sequence|%0}1 in this position "
+ "%select{an attribute specifier sequence|%1}0 in this position "
"is a C++23 extension">, InGroup<CXX23AttrsOnLambda>;
def ext_lambda_missing_parens : ExtWarn<
"lambda without a parameter clause is a C++23 extension">,
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index bb39444..e1be08c 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -683,8 +683,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
//===----------------------------------------------------------------------===//
defvar CIR_YieldableScopes = [
- "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp",
- "SwitchOp", "TernaryOp", "WhileOp"
+ "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
+ "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp"
];
def CIR_YieldOp : CIR_Op<"yield", [
@@ -1776,7 +1776,9 @@ def CIR_GlobalLinkageKind : CIR_I32EnumAttr<
// is upstreamed.
def CIR_GlobalOp : CIR_Op<"global", [
- DeclareOpInterfaceMethods<CIRGlobalValueInterface>
+ DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+ DeclareOpInterfaceMethods<CIRGlobalValueInterface>,
+ NoRegionArguments
]> {
let summary = "Declare or define a global variable";
let description = [{
@@ -1807,6 +1809,9 @@ def CIR_GlobalOp : CIR_Op<"global", [
UnitAttr:$dso_local,
OptionalAttr<I64Attr>:$alignment);
+ let regions = (region MaxSizedRegion<1>:$ctorRegion,
+ MaxSizedRegion<1>:$dtorRegion);
+
let assemblyFormat = [{
($sym_visibility^)?
(`` $global_visibility^)?
@@ -1815,24 +1820,34 @@ def CIR_GlobalOp : CIR_Op<"global", [
(`comdat` $comdat^)?
(`dso_local` $dso_local^)?
$sym_name
- custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value)
+ custom<GlobalOpTypeAndInitialValue>($sym_type, $initial_value,
+ $ctorRegion, $dtorRegion)
attr-dict
}];
let extraClassDeclaration = [{
- bool isDeclaration() { return !getInitialValue(); }
+ bool isDeclaration() {
+ return !getInitialValue() && getCtorRegion().empty() && getDtorRegion().empty();
+ }
bool hasInitializer() { return !isDeclaration(); }
}];
let skipDefaultBuilders = 1;
- let builders = [OpBuilder<(ins
- "llvm::StringRef":$sym_name,
- "mlir::Type":$sym_type,
- CArg<"bool", "false">:$isConstant,
- // CIR defaults to external linkage.
- CArg<"cir::GlobalLinkageKind",
- "cir::GlobalLinkageKind::ExternalLinkage">:$linkage)>];
+ let builders = [
+ OpBuilder<(ins
+ "llvm::StringRef":$sym_name,
+ "mlir::Type":$sym_type,
+ CArg<"bool", "false">:$isConstant,
+ // CIR defaults to external linkage.
+ CArg<"cir::GlobalLinkageKind",
+ "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$ctorBuilder,
+ CArg<"llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>",
+ "nullptr">:$dtorBuilder)
+ >
+ ];
let hasVerifier = 1;
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index cb8fe6c..9d12a13 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -951,28 +951,37 @@ Address CIRGenFunction::getAddressOfBaseClass(
bool nullCheckValue, SourceLocation loc) {
assert(!path.empty() && "Base path should not be empty!");
+ CastExpr::path_const_iterator start = path.begin();
+ const CXXRecordDecl *vBase = nullptr;
+
if ((*path.begin())->isVirtual()) {
- // The implementation here is actually complete, but let's flag this
- // as an error until the rest of the virtual base class support is in place.
- cgm.errorNYI(loc, "getAddrOfBaseClass: virtual base");
- return Address::invalid();
+ vBase = (*start)->getType()->castAsCXXRecordDecl();
+ ++start;
}
// Compute the static offset of the ultimate destination within its
// allocating subobject (the virtual base, if there is one, or else
// the "complete" object that we see).
- CharUnits nonVirtualOffset =
- cgm.computeNonVirtualBaseClassOffset(derived, path);
+ CharUnits nonVirtualOffset = cgm.computeNonVirtualBaseClassOffset(
+ vBase ? vBase : derived, {start, path.end()});
+
+ // If there's a virtual step, we can sometimes "devirtualize" it.
+ // For now, that's limited to when the derived type is final.
+ // TODO: "devirtualize" this for accesses to known-complete objects.
+ if (vBase && derived->hasAttr<FinalAttr>()) {
+ const ASTRecordLayout &layout = getContext().getASTRecordLayout(derived);
+ CharUnits vBaseOffset = layout.getVBaseClassOffset(vBase);
+ nonVirtualOffset += vBaseOffset;
+ vBase = nullptr; // we no longer have a virtual step
+ }
// Get the base pointer type.
mlir::Type baseValueTy = convertType((path.end()[-1])->getType());
assert(!cir::MissingFeatures::addressSpace());
- // The if statement here is redundant now, but it will be needed when we add
- // support for virtual base classes.
// If there is no virtual base, use cir.base_class_addr. It takes care of
// the adjustment and the null pointer check.
- if (nonVirtualOffset.isZero()) {
+ if (nonVirtualOffset.isZero() && !vBase) {
assert(!cir::MissingFeatures::sanitizers());
return builder.createBaseClassAddr(getLoc(loc), value, baseValueTy, 0,
/*assumeNotNull=*/true);
@@ -980,10 +989,17 @@ Address CIRGenFunction::getAddressOfBaseClass(
assert(!cir::MissingFeatures::sanitizers());
- // Apply the offset
- value = builder.createBaseClassAddr(getLoc(loc), value, baseValueTy,
- nonVirtualOffset.getQuantity(),
- /*assumeNotNull=*/true);
+ // Compute the virtual offset.
+ mlir::Value virtualOffset = nullptr;
+ if (vBase) {
+ virtualOffset = cgm.getCXXABI().getVirtualBaseClassOffset(
+ getLoc(loc), *this, value, derived, vBase);
+ }
+
+ // Apply both offsets.
+ value = applyNonVirtualAndVirtualOffset(
+ getLoc(loc), *this, value, nonVirtualOffset, virtualOffset, derived,
+ vBase, baseValueTy, not nullCheckValue);
// Cast to the destination type.
value = value.withElementType(builder, baseValueTy);
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index b93d9a9..f4bbced 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -676,6 +676,10 @@ public:
mlir::Value VisitRealImag(const UnaryOperator *e,
QualType promotionType = QualType());
+ mlir::Value VisitUnaryExtension(const UnaryOperator *e) {
+ return Visit(e->getSubExpr());
+ }
+
mlir::Value VisitCXXDefaultInitExpr(CXXDefaultInitExpr *die) {
CIRGenFunction::CXXDefaultInitExprScope scope(cgf, die);
return Visit(die->getExpr());
@@ -1278,9 +1282,6 @@ mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e,
} else if (const auto *uo = dyn_cast<UnaryOperator>(e)) {
switch (uo->getOpcode()) {
case UO_Imag:
- cgf.cgm.errorNYI(e->getSourceRange(),
- "ScalarExprEmitter::emitPromoted unary imag");
- return {};
case UO_Real:
return VisitRealImag(uo, promotionType);
case UO_Minus:
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index e842892..644c383 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -216,6 +216,7 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
case Stmt::OMPSimdDirectiveClass:
case Stmt::OMPTileDirectiveClass:
case Stmt::OMPUnrollDirectiveClass:
+ case Stmt::OMPFuseDirectiveClass:
case Stmt::OMPForDirectiveClass:
case Stmt::OMPForSimdDirectiveClass:
case Stmt::OMPSectionsDirectiveClass:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 58ef500..fb87036 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1355,9 +1355,11 @@ mlir::LogicalResult cir::GlobalOp::verify() {
return success();
}
-void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
- llvm::StringRef sym_name, mlir::Type sym_type,
- bool isConstant, cir::GlobalLinkageKind linkage) {
+void cir::GlobalOp::build(
+ OpBuilder &odsBuilder, OperationState &odsState, llvm::StringRef sym_name,
+ mlir::Type sym_type, bool isConstant, cir::GlobalLinkageKind linkage,
+ function_ref<void(OpBuilder &, Location)> ctorBuilder,
+ function_ref<void(OpBuilder &, Location)> dtorBuilder) {
odsState.addAttribute(getSymNameAttrName(odsState.name),
odsBuilder.getStringAttr(sym_name));
odsState.addAttribute(getSymTypeAttrName(odsState.name),
@@ -1370,26 +1372,88 @@ void cir::GlobalOp::build(OpBuilder &odsBuilder, OperationState &odsState,
cir::GlobalLinkageKindAttr::get(odsBuilder.getContext(), linkage);
odsState.addAttribute(getLinkageAttrName(odsState.name), linkageAttr);
+ Region *ctorRegion = odsState.addRegion();
+ if (ctorBuilder) {
+ odsBuilder.createBlock(ctorRegion);
+ ctorBuilder(odsBuilder, odsState.location);
+ }
+
+ Region *dtorRegion = odsState.addRegion();
+ if (dtorBuilder) {
+ odsBuilder.createBlock(dtorRegion);
+ dtorBuilder(odsBuilder, odsState.location);
+ }
+
odsState.addAttribute(getGlobalVisibilityAttrName(odsState.name),
cir::VisibilityAttr::get(odsBuilder.getContext()));
}
+/// Given the region at `index`, or the parent operation if `index` is None,
+/// return the successor regions. These are the regions that may be selected
+/// during the flow of control. `operands` is a set of optional attributes that
+/// correspond to a constant value for each operand, or null if that operand is
+/// not a constant.
+void cir::GlobalOp::getSuccessorRegions(
+ mlir::RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {
+ // The `ctor` and `dtor` regions always branch back to the parent operation.
+ if (!point.isParent()) {
+ regions.push_back(RegionSuccessor());
+ return;
+ }
+
+ // Don't consider the ctor region if it is empty.
+ Region *ctorRegion = &this->getCtorRegion();
+ if (ctorRegion->empty())
+ ctorRegion = nullptr;
+
+ // Don't consider the dtor region if it is empty.
+ Region *dtorRegion = &this->getCtorRegion();
+ if (dtorRegion->empty())
+ dtorRegion = nullptr;
+
+ // If the condition isn't constant, both regions may be executed.
+ if (ctorRegion)
+ regions.push_back(RegionSuccessor(ctorRegion));
+ if (dtorRegion)
+ regions.push_back(RegionSuccessor(dtorRegion));
+}
+
static void printGlobalOpTypeAndInitialValue(OpAsmPrinter &p, cir::GlobalOp op,
- TypeAttr type,
- Attribute initAttr) {
+ TypeAttr type, Attribute initAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
+ auto printType = [&]() { p << ": " << type; };
if (!op.isDeclaration()) {
p << "= ";
- // This also prints the type...
- if (initAttr)
- printConstant(p, initAttr);
+ if (!ctorRegion.empty()) {
+ p << "ctor ";
+ printType();
+ p << " ";
+ p.printRegion(ctorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ } else {
+ // This also prints the type...
+ if (initAttr)
+ printConstant(p, initAttr);
+ }
+
+ if (!dtorRegion.empty()) {
+ p << " dtor ";
+ p.printRegion(dtorRegion,
+ /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+ }
} else {
- p << ": " << type;
+ printType();
}
}
-static ParseResult
-parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
- Attribute &initialValueAttr) {
+static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser,
+ TypeAttr &typeAttr,
+ Attribute &initialValueAttr,
+ mlir::Region &ctorRegion,
+ mlir::Region &dtorRegion) {
mlir::Type opTy;
if (parser.parseOptionalEqual().failed()) {
// Absence of equal means a declaration, so we need to parse the type.
@@ -1397,16 +1461,38 @@ parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr,
if (parser.parseColonType(opTy))
return failure();
} else {
- // Parse constant with initializer, examples:
- // cir.global @y = #cir.fp<1.250000e+00> : !cir.double
- // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
- if (parseConstantValue(parser, initialValueAttr).failed())
- return failure();
+ // Parse contructor, example:
+ // cir.global @rgb = ctor : type { ... }
+ if (!parser.parseOptionalKeyword("ctor")) {
+ if (parser.parseColonType(opTy))
+ return failure();
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(ctorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, ctorRegion, parseLoc).failed())
+ return failure();
+ } else {
+ // Parse constant with initializer, examples:
+ // cir.global @y = 3.400000e+00 : f32
+ // cir.global @rgb = #cir.const_array<[...] : !cir.array<i8 x 3>>
+ if (parseConstantValue(parser, initialValueAttr).failed())
+ return failure();
+
+ assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
+ "Non-typed attrs shouldn't appear here.");
+ auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
+ opTy = typedAttr.getType();
+ }
- assert(mlir::isa<mlir::TypedAttr>(initialValueAttr) &&
- "Non-typed attrs shouldn't appear here.");
- auto typedAttr = mlir::cast<mlir::TypedAttr>(initialValueAttr);
- opTy = typedAttr.getType();
+ // Parse destructor, example:
+ // dtor { ... }
+ if (!parser.parseOptionalKeyword("dtor")) {
+ auto parseLoc = parser.getCurrentLocation();
+ if (parser.parseRegion(dtorRegion, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+ if (ensureRegionTerm(parser, dtorRegion, parseLoc).failed())
+ return failure();
+ }
}
typeAttr = TypeAttr::get(opTy);
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 57db20f7..64f1917 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1090,8 +1090,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (std::optional<GCOVOptions> Options =
getGCOVOptions(CodeGenOpts, LangOpts))
PB.registerPipelineStartEPCallback(
- [Options](ModulePassManager &MPM, OptimizationLevel Level) {
- MPM.addPass(GCOVProfilerPass(*Options));
+ [this, Options](ModulePassManager &MPM, OptimizationLevel Level) {
+ MPM.addPass(
+ GCOVProfilerPass(*Options, CI.getVirtualFileSystemPtr()));
});
if (std::optional<InstrProfOptions> Options =
getInstrProfOptions(CodeGenOpts, LangOpts))
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 07cf08c..6596ec0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -192,9 +192,17 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
return CGF.Builder.CreateCall(F, {Src0, Src1});
}
+static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
+ if (AMDGCNScope == "agent")
+ return "device";
+ if (AMDGCNScope == "wavefront")
+ return "subgroup";
+ return AMDGCNScope;
+}
+
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
-// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// \p Order takes a C++11 compatible memory-ordering specifier and converts
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
// Some of the atomic builtins take the scope as a string name.
StringRef scp;
if (llvm::getConstantStringInfo(Scope, scp)) {
+ if (getTarget().getTriple().isSPIRV())
+ scp = mapScopeToSPIRV(scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return;
}
@@ -238,13 +248,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = llvm::SyncScope::System;
break;
case 1: // __MEMORY_SCOPE_DEVICE
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
break;
case 2: // __MEMORY_SCOPE_WRKGRP
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
break;
case 3: // __MEMORY_SCOPE_WVFRNT
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
break;
case 4: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
@@ -1510,7 +1526,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
//
// The global/flat cases need to use agent scope to consistently produce
// the native instruction instead of a cmpxchg expansion.
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
AO = AtomicOrdering::Monotonic;
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 8605ba2..a2c6957 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -1299,7 +1299,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
Diag(Tok, getLangOpts().CPlusPlus23
? diag::warn_cxx20_compat_decl_attrs_on_lambda
: diag::ext_decl_attrs_on_lambda)
- << Tok.getIdentifierInfo() << Tok.isRegularKeywordAttribute();
+ << Tok.isRegularKeywordAttribute() << Tok.getIdentifierInfo();
MaybeParseCXX11Attributes(D);
}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 3b267c1..3302bfc 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -20108,8 +20108,9 @@ static void DoMarkVarDeclReferenced(
bool NeededForConstantEvaluation =
isPotentiallyConstantEvaluatedContext(SemaRef) && UsableInConstantExpr;
- bool NeedDefinition =
- OdrUse == OdrUseContext::Used || NeededForConstantEvaluation;
+ bool NeedDefinition = OdrUse == OdrUseContext::Used ||
+ NeededForConstantEvaluation ||
+ Var->getType()->isUndeducedType();
assert(!isa<VarTemplatePartialSpecializationDecl>(Var) &&
"Can't instantiate a partial template specialization.");
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index e901631..4c396d3 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -1270,3 +1270,40 @@ void real_on_scalar_from_real_with_type_promotion() {
// OGCG: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
// OGCG: %[[A_REAL_F16:.*]] = fptrunc float %[[A_REAL_F32]] to half
// OGCG: store half %[[A_REAL_F16]], ptr %[[B_ADDR]], align 2
+
+void real_on_scalar_from_imag_with_type_promotion() {
+ _Float16 _Complex a;
+ _Float16 b = __real__(__imag__ a);
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.f16>, !cir.ptr<!cir.complex<!cir.f16>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.f16, !cir.ptr<!cir.f16>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.f16>>, !cir.complex<!cir.f16>
+// CIR: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.f16> -> !cir.f16
+// CIR: %[[A_REAL_F32:.*]] = cir.cast(floating, %[[A_REAL]] : !cir.f16), !cir.float
+// CIR: %[[A_IMAG_F32:.*]] = cir.cast(floating, %[[A_IMAG]] : !cir.f16), !cir.float
+// CIR: %[[A_COMPLEX_F32:.*]] = cir.complex.create %[[A_REAL_F32]], %[[A_IMAG_F32]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: %[[A_IMAG_F32:.*]] = cir.complex.imag %[[A_COMPLEX_F32]] : !cir.complex<!cir.float> -> !cir.float
+// CIR: %[[A_IMAG_F16:.*]] = cir.cast(floating, %[[A_IMAG_F32]] : !cir.float), !cir.f16
+// CIR: cir.store{{.*}} %[[A_IMAG_F16]], %[[B_ADDR]] : !cir.f16, !cir.ptr<!cir.f16>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { half, half }, i64 1, align 2
+// LLVM: %[[B_ADDR]] = alloca half, i64 1, align 2
+// LLVM: %[[TMP_A:.*]] = load { half, half }, ptr %[[A_ADDR]], align 2
+// LLVM: %[[A_REAL:.*]] = extractvalue { half, half } %[[TMP_A]], 0
+// LLVM: %[[A_IMAG:.*]] = extractvalue { half, half } %[[TMP_A]], 1
+// LLVM: %[[A_REAL_F32:.*]] = fpext half %[[A_REAL]] to float
+// LLVM: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// LLVM: %[[TMP_A_COMPLEX_F32:.*]] = insertvalue { float, float } {{.*}}, float %[[A_REAL_F32]], 0
+// LLVM: %[[A_COMPLEX_F32:.*]] = insertvalue { float, float } %[[TMP_A_COMPLEX_F32]], float %[[A_IMAG_F32]], 1
+// LLVM: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// LLVM: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
+
+// OGCG: %[[A_ADDR:.*]] = alloca { half, half }, align 2
+// OGCG: %[[B_ADDR:.*]] = alloca half, align 2
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { half, half }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load half, ptr %[[A_IMAG_PTR]], align 2
+// OGCG: %[[A_IMAG_F32:.*]] = fpext half %[[A_IMAG]] to float
+// OGCG: %[[A_IMAG_F16:.*]] = fptrunc float %[[A_IMAG_F32]] to half
+// OGCG: store half %[[A_IMAG_F16]], ptr %[[B_ADDR]], align 2
diff --git a/clang/test/CIR/CodeGen/vbase.cpp b/clang/test/CIR/CodeGen/vbase.cpp
index 9139651..4d57f8e 100644
--- a/clang/test/CIR/CodeGen/vbase.cpp
+++ b/clang/test/CIR/CodeGen/vbase.cpp
@@ -13,19 +13,29 @@ public:
class Derived : public virtual Base {};
-// This is just here to force the record types to be emitted.
void f() {
Derived d;
+ d.f();
+}
+
+class DerivedFinal final : public virtual Base {};
+
+void g() {
+ DerivedFinal df;
+ df.f();
}
// CIR: !rec_Base = !cir.record<class "Base" {!cir.vptr}>
// CIR: !rec_Derived = !cir.record<class "Derived" {!rec_Base}>
+// CIR: !rec_DerivedFinal = !cir.record<class "DerivedFinal" {!rec_Base}>
// LLVM: %class.Derived = type { %class.Base }
// LLVM: %class.Base = type { ptr }
+// LLVM: %class.DerivedFinal = type { %class.Base }
// OGCG: %class.Derived = type { %class.Base }
// OGCG: %class.Base = type { ptr }
+// OGCG: %class.DerivedFinal = type { %class.Base }
// Test the constructor handling for a class with a virtual base.
struct A {
@@ -47,6 +57,76 @@ void ppp() { B b; }
// OGCG: @_ZTV1B = linkonce_odr unnamed_addr constant { [3 x ptr] } { [3 x ptr] [ptr inttoptr (i64 12 to ptr), ptr null, ptr @_ZTI1B] }, comdat, align 8
+// CIR: cir.func {{.*}}@_Z1fv() {
+// CIR: %[[D:.+]] = cir.alloca !rec_Derived, !cir.ptr<!rec_Derived>, ["d", init]
+// CIR: cir.call @_ZN7DerivedC1Ev(%[[D]]) nothrow : (!cir.ptr<!rec_Derived>) -> ()
+// CIR: %[[VPTR_PTR:.+]] = cir.vtable.get_vptr %[[D]] : !cir.ptr<!rec_Derived> -> !cir.ptr<!cir.vptr>
+// CIR: %[[VPTR:.+]] = cir.load {{.*}} %[[VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[VPTR_I8:.+]] = cir.cast(bitcast, %[[VPTR]] : !cir.vptr), !cir.ptr<!u8i>
+// CIR: %[[NEG32:.+]] = cir.const #cir.int<-32> : !s64i
+// CIR: %[[ADJ_VPTR_I8:.+]] = cir.ptr_stride(%[[VPTR_I8]] : !cir.ptr<!u8i>, %[[NEG32]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[OFFSET_PTR:.+]] = cir.cast(bitcast, %[[ADJ_VPTR_I8]] : !cir.ptr<!u8i>), !cir.ptr<!s64i>
+// CIR: %[[OFFSET:.+]] = cir.load {{.*}} %[[OFFSET_PTR]] : !cir.ptr<!s64i>, !s64i
+// CIR: %[[D_I8:.+]] = cir.cast(bitcast, %[[D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_I8:.+]] = cir.ptr_stride(%[[D_I8]] : !cir.ptr<!u8i>, %[[OFFSET]] : !s64i), !cir.ptr<!u8i>
+// CIR: %[[ADJ_THIS_D:.+]] = cir.cast(bitcast, %[[ADJ_THIS_I8]] : !cir.ptr<!u8i>), !cir.ptr<!rec_Derived>
+// CIR: %[[BASE_THIS:.+]] = cir.cast(bitcast, %[[ADJ_THIS_D]] : !cir.ptr<!rec_Derived>), !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR:.+]] = cir.vtable.get_vptr %[[BASE_THIS]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN:.+]] = cir.load {{.*}} %[[SLOT_PTR]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN]](%[[BASE_THIS]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// CIR: cir.func {{.*}}@_Z1gv() {
+// CIR: %[[DF:.+]] = cir.alloca !rec_DerivedFinal, !cir.ptr<!rec_DerivedFinal>, ["df", init]
+// CIR: cir.call @_ZN12DerivedFinalC1Ev(%[[DF]]) nothrow : (!cir.ptr<!rec_DerivedFinal>) -> ()
+// CIR: %[[BASE_THIS_2:.+]] = cir.base_class_addr %[[DF]] : !cir.ptr<!rec_DerivedFinal> nonnull [0] -> !cir.ptr<!rec_Base>
+// CIR: %[[BASE_VPTR_PTR_2:.+]] = cir.vtable.get_vptr %[[BASE_THIS_2]] : !cir.ptr<!rec_Base> -> !cir.ptr<!cir.vptr>
+// CIR: %[[BASE_VPTR_2:.+]] = cir.load {{.*}} %[[BASE_VPTR_PTR_2]] : !cir.ptr<!cir.vptr>, !cir.vptr
+// CIR: %[[SLOT_PTR_2:.+]] = cir.vtable.get_virtual_fn_addr %[[BASE_VPTR_2]][0] : !cir.vptr -> !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>
+// CIR: %[[FN_2:.+]] = cir.load {{.*}} %[[SLOT_PTR_2]] : !cir.ptr<!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>>, !cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>
+// CIR: cir.call %[[FN_2]](%[[BASE_THIS_2]]) : (!cir.ptr<!cir.func<(!cir.ptr<!rec_Base>)>>, !cir.ptr<!rec_Base>) -> ()
+// CIR: cir.return
+
+// LLVM: define {{.*}}void @_Z1fv()
+// LLVM: %[[D:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN7DerivedC1Ev(ptr %[[D]])
+// LLVM: %[[VPTR_ADDR:.+]] = load ptr, ptr %[[D]]
+// LLVM: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VPTR_ADDR]], i64 -32
+// LLVM: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// LLVM: %[[ADJ_THIS:.+]] = getelementptr i8, ptr %[[D]], i64 %[[OFF]]
+// LLVM: %[[VFN_TAB:.+]] = load ptr, ptr %[[ADJ_THIS]]
+// LLVM: %[[SLOT0:.+]] = getelementptr inbounds ptr, ptr %[[VFN_TAB]], i32 0
+// LLVM: %[[VFN:.+]] = load ptr, ptr %[[SLOT0]]
+// LLVM: call void %[[VFN]](ptr %[[ADJ_THIS]])
+// LLVM: ret void
+
+// LLVM: define {{.*}}void @_Z1gv()
+// LLVM: %[[DF:.+]] = alloca {{.*}}
+// LLVM: call void @_ZN12DerivedFinalC1Ev(ptr %[[DF]])
+// LLVM: %[[VPTR2:.+]] = load ptr, ptr %[[DF]]
+// LLVM: %[[SLOT0_2:.+]] = getelementptr inbounds ptr, ptr %[[VPTR2]], i32 0
+// LLVM: %[[VFN2:.+]] = load ptr, ptr %[[SLOT0_2]]
+// LLVM: call void %[[VFN2]](ptr %[[DF]])
+// LLVM: ret void
+
+// OGCG: define {{.*}}void @_Z1fv()
+// OGCG: %[[D:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN7DerivedC1Ev(ptr {{.*}} %[[D]])
+// OGCG: %[[VTABLE:.+]] = load ptr, ptr %[[D]]
+// OGCG: %[[NEG32_PTR:.+]] = getelementptr i8, ptr %[[VTABLE]], i64 -32
+// OGCG: %[[OFF:.+]] = load i64, ptr %[[NEG32_PTR]]
+// OGCG: %[[ADJ_THIS:.+]] = getelementptr inbounds i8, ptr %[[D]], i64 %[[OFF]]
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[ADJ_THIS]])
+// OGCG: ret void
+
+// OGCG: define {{.*}}void @_Z1gv()
+// OGCG: %[[DF:.+]] = alloca {{.*}}
+// OGCG: call void @_ZN12DerivedFinalC1Ev(ptr {{.*}} %[[DF]])
+// OGCG: call void @_ZN4Base1fEv(ptr {{.*}} %[[DF]])
+// OGCG: ret void
+
// Constructor for B
// CIR: cir.func comdat linkonce_odr @_ZN1BC1Ev(%arg0: !cir.ptr<!rec_B>
// CIR: %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init]
diff --git a/clang/test/CIR/CodeGen/vector-ext.cpp b/clang/test/CIR/CodeGen/vector-ext.cpp
index 8b5379a..8bca48d 100644
--- a/clang/test/CIR/CodeGen/vector-ext.cpp
+++ b/clang/test/CIR/CodeGen/vector-ext.cpp
@@ -1322,3 +1322,23 @@ void logical_not() {
// OGCG: %[[RESULT:.*]] = icmp eq <4 x i32> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/CodeGen/vector.cpp b/clang/test/CIR/CodeGen/vector.cpp
index d8fdeea..f242779 100644
--- a/clang/test/CIR/CodeGen/vector.cpp
+++ b/clang/test/CIR/CodeGen/vector.cpp
@@ -1390,3 +1390,23 @@ void logical_not_float() {
// OGCG: %[[RESULT:.*]] = fcmp oeq <4 x float> %[[TMP_A]], zeroinitializer
// OGCG: %[[RESULT_VI4:.*]] = sext <4 x i1> %[[RESULT]] to <4 x i32>
// OGCG: store <4 x i32> %[[RESULT_VI4]], ptr %[[B_ADDR]], align 16
+
+void unary_extension() {
+ vi4 a;
+ vi4 b = __extension__ a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[B_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["b", init]
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[B_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[B_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[B_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[B_ADDR]], align 16
diff --git a/clang/test/CIR/IR/global-init.cir b/clang/test/CIR/IR/global-init.cir
new file mode 100644
index 0000000..727c067
--- /dev/null
+++ b/clang/test/CIR/IR/global-init.cir
@@ -0,0 +1,48 @@
+// RUN: cir-opt --verify-roundtrip %s -o - | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+!rec_NeedsCtor = !cir.record<struct "NeedsCtor" padded {!u8i}>
+!rec_NeedsDtor = !cir.record<struct "NeedsDtor" padded {!u8i}>
+!rec_NeedsCtorDtor = !cir.record<struct "NeedsCtorDtor" padded {!u8i}>
+
+module attributes {cir.triple = "x86_64-unknown-linux-gnu"} {
+ cir.func private @_ZN9NeedsCtorC1Ev(!cir.ptr<!rec_NeedsCtor>)
+ cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ }
+ // CHECK: cir.global external @needsCtor = ctor : !rec_NeedsCtor {
+ // CHECK: %0 = cir.get_global @needsCtor : !cir.ptr<!rec_NeedsCtor>
+ // CHECK: cir.call @_ZN9NeedsCtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN9NeedsDtorD1Ev(!cir.ptr<!rec_NeedsDtor>)
+ cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ }
+ // CHECK: cir.global external dso_local @needsDtor = #cir.zero : !rec_NeedsDtor dtor {
+ // CHECK: %0 = cir.get_global @needsDtor : !cir.ptr<!rec_NeedsDtor>
+ // CHECK: cir.call @_ZN9NeedsDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsDtor>) -> ()
+ // CHECK: }
+
+ cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ } dtor {
+ %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ }
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorC1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.func private @_ZN13NeedsCtorDtorD1Ev(!cir.ptr<!rec_NeedsCtorDtor>)
+ // CHECK: cir.global external dso_local @needsCtorDtor = ctor : !rec_NeedsCtorDtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorC1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: } dtor {
+ // CHECK: %0 = cir.get_global @needsCtorDtor : !cir.ptr<!rec_NeedsCtorDtor>
+ // CHECK: cir.call @_ZN13NeedsCtorDtorD1Ev(%0) : (!cir.ptr<!rec_NeedsCtorDtor>) -> ()
+ // CHECK: }
+}
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index 5920ced..137a49b 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
// CHECK-NEXT: entry:
@@ -21,6 +24,43 @@
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter64Py(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter32PVj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter64PVy(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() {
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global32() {
global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
@@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global64() {
global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
@@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant32() {
__UINT32_TYPE__ local_val;
@@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant64() {
__UINT64_TYPE__ local_val;
@@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope64() {
__attribute__((shared)) __UINT64_TYPE__ val;
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 1e977dd..dd1ca45 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -12,6 +15,25 @@
// CHECK-NEXT: fence syncscope("agent") acq_rel
// CHECK-NEXT: fence syncscope("workgroup") release
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv(
+// GCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst
+// GCN-NEXT: fence syncscope("agent") acquire
+// GCN-NEXT: fence seq_cst
+// GCN-NEXT: fence syncscope("agent") acq_rel
+// GCN-NEXT: fence syncscope("workgroup") release
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire
+// AMDGCNSPIRV-NEXT: fence seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release
+// AMDGCNSPIRV-NEXT: ret void
//
void test_memory_fence_success() {
@@ -35,6 +57,25 @@ void test_memory_fence_success() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_localv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_local() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -58,6 +99,25 @@ void test_local() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z11test_globalv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
+// GCN-NEXT: fence seq_cst, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_global() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
@@ -80,6 +140,25 @@ void test_global() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_imagev(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_image() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -99,13 +178,33 @@ void test_image() {
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_mixedv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_mixed() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
}
-//.
// CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
// CHECK: [[META5]] = !{[[META4]], [[META3]]}
//.
+// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// GCN: [[META5]] = !{[[META4]], [[META3]]}
+//.
+// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]}
+//.
diff --git a/clang/test/CodeGenCXX/gh56652.cpp b/clang/test/CodeGenCXX/gh56652.cpp
new file mode 100644
index 0000000..06a496e
--- /dev/null
+++ b/clang/test/CodeGenCXX/gh56652.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-elf-gnu %s -emit-llvm -o - | FileCheck %s
+
+namespace GH56652{
+
+struct foo {};
+
+template <typename T> struct bar {
+ using type = T;
+
+ template <foo> inline static constexpr auto b = true;
+};
+
+template <typename T>
+concept C = requires(T a) { T::template b<foo{}>; };
+
+template <typename T> auto fn(T) {
+ if constexpr (!C<T>)
+ return foo{};
+ else
+ return T{};
+}
+
+auto a = decltype(fn(bar<int>{})){};
+
+}
+
+namespace GH116319 {
+
+template <int = 0> struct a {
+template <class> static constexpr auto b = 2;
+template <class> static void c() noexcept(noexcept(b<int>)) {}
+};
+
+void test() { a<>::c<int>(); }
+
+
+}
+
+// CHECK: %"struct.GH56652::bar" = type { i8 }
+// CHECK: $_ZN8GH1163191aILi0EE1cIiEEvv = comdat any
+// CHECK: @_ZN7GH566521aE = global %"struct.GH56652::bar" undef
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index 19ab656..7cd3f14 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -1,13 +1,13 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() {
}
// CHECK-LABEL: @test_global_add_f32
-// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
#if !defined(__SPIRV__)
void test_global_add_f32(float *rtn, global float *addr, float x) {
#else
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 5f202ba..6bb20bf 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,9 +1,9 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1)
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
#if !defined(__SPIRV__)
@@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) {
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
@@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 039d032..ab0b0b9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -1231,7 +1231,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu
// CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
- // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
index 7ffb7aae..8c7a778 100644
--- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
+++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp
@@ -1,9 +1,7 @@
-// RUN: %clang_cc1 -std=c++20 %s -verify=cxx20
-// RUN: %clang_cc1 -std=c++23 %s -verify=cxx23
-// RUN: %clang_cc1 -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
-// RUN: %clang_cc1 -std=c++23 -pedantic %s -verify=cxx23
-
-//cxx23-no-diagnostics
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++20 %s -verify=cxx20
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 %s -verify=cxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -Wpre-c++23-compat %s -verify=precxx23
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +sme -std=c++23 -pedantic %s -verify=cxx23
auto L1 = [] constexpr {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
@@ -14,3 +12,25 @@ auto L3 = [] static {};
// cxx20-warning@-1 {{lambda without a parameter clause is a C++23 extension}}
// cxx20-warning@-2 {{static lambdas are a C++23 extension}}
// precxx23-warning@-3 {{static lambdas are incompatible with C++ standards before C++23}}
+
+namespace GH161070 {
+void t1() { int a = [] __arm_streaming; }
+// precxx23-error@-1 {{'__arm_streaming' cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'__arm_streaming' cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{'__arm_streaming' in this position is a C++23 extension}}
+// precxx23-warning@-8 {{'__arm_streaming' in this position is incompatible with C++ standards before C++23}}
+
+void t2() { int a = [] [[assume(true)]]; }
+// precxx23-error@-1 {{'assume' attribute cannot be applied to a declaration}}
+// precxx23-error@-2 {{expected body of lambda expression}}
+// cxx23-error@-3 {{'assume' attribute cannot be applied to a declaration}}
+// cxx23-error@-4 {{expected body of lambda expression}}
+// cxx20-error@-5 {{'assume' attribute cannot be applied to a declaration}}
+// cxx20-error@-6 {{expected body of lambda expression}}
+// cxx20-warning@-7 {{an attribute specifier sequence in this position is a C++23 extension}}
+// precxx23-warning@-8 {{an attribute specifier sequence in this position is incompatible with C++ standards before C++23}}
+}
diff --git a/clang/test/SemaCXX/decltype.cpp b/clang/test/SemaCXX/decltype.cpp
index 739485b..45a4c4c 100644
--- a/clang/test/SemaCXX/decltype.cpp
+++ b/clang/test/SemaCXX/decltype.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wno-c99-designator %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify -Wno-c99-designator %s
// PR5290
int const f0();
@@ -156,6 +157,8 @@ struct A {
}
};
+
+
// This shouldn't crash.
static_assert(A<int>().f<int>() == 0, "");
// The result should not be dependent.
@@ -163,6 +166,81 @@ static_assert(A<int>().f<int>() != 0, ""); // expected-error {{static assertion
// expected-note@-1 {{expression evaluates to '0 != 0'}}
}
+
+#if __cplusplus >= 201703L
+namespace GH160497 {
+
+template <class> struct S {
+ template <class>
+ inline static auto mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return 42;
+ }();
+};
+
+using T = decltype(S<void>::mem<void>);
+ // expected-note@-1 {{in instantiation of static data member 'GH160497::S<void>::mem<void>' requested here}}
+
+
+template <class> struct S2 {
+ template <class>
+ inline static auto* mem =
+ [] { static_assert(false); // expected-error {{static assertion failed}} \
+ // expected-note {{while substituting into a lambda expression here}}
+ return static_cast<int*>(nullptr);
+ }();
+};
+
+using T2 = decltype(S2<void>::mem<void>);
+//expected-note@-1 {{in instantiation of static data member 'GH160497::S2<void>::mem<void>' requested here}}
+
+template <class> struct S3 {
+ template <class>
+ inline static int mem = // Check we don't instantiate when the type is not deduced.
+ [] { static_assert(false);
+ return 42;
+ }();
+};
+
+using T = decltype(S3<void>::mem<void>);
+}
+
+namespace N1 {
+
+template<class>
+struct S {
+ template<class>
+ inline static auto mem = 42;
+};
+
+using T = decltype(S<void>::mem<void>);
+
+T y = 42;
+
+}
+
+namespace GH161196 {
+
+template <typename> struct A {
+ static constexpr int digits = 0;
+};
+
+template <typename> struct B {
+ template <int, typename MaskInt = int, int = A<MaskInt>::digits>
+ static constexpr auto XBitMask = 0;
+};
+
+struct C {
+ using ReferenceHost = B<int>;
+ template <int> static decltype(ReferenceHost::XBitMask<0>) XBitMask;
+};
+
+void test() { (void)C::XBitMask<0>; }
+
+}
+#endif
+
template<typename>
class conditional {
};
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0e2758d..e41f4eb 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -420,7 +420,7 @@ public:
std::vector<ModuleDeps *> NewMDs;
{
std::unique_lock<std::mutex> ul(Lock);
- for (const ModuleDeps &MD : Graph) {
+ for (ModuleDeps &MD : Graph) {
auto I = Modules.find({MD.ID, 0});
if (I != Modules.end()) {
I->first.InputIndex = std::min(I->first.InputIndex, InputIndex);
diff --git a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
index 982f3a4..526ba5c 100644
--- a/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
+++ b/compiler-rt/test/builtins/Unit/fixunstfdi_test.c
@@ -1,24 +1,25 @@
-// XFAIL: target=aarch64-{{.*}}-windows-{{.*}}
// RUN: %clang_builtins %s %librt -o %t && %run %t
// REQUIRES: librt_has_fixunstfdi
#include <stdio.h>
+#include "int_lib.h"
-#if _ARCH_PPC || __aarch64__ || __arm64ec__
+#if defined(CRT_HAS_TF_MODE)
-#include "int_lib.h"
+#define QUAD_PRECISION
+#include "fp_lib.h"
// Returns: convert a to a unsigned long long, rounding toward zero.
// Negative values all become zero.
-// Assumption: long double is a 128 bit floating point type
+// Assumption: fp_t is a 128 bit floating point type
// du_int is a 64 bit integral type
-// value in long double is representable in du_int or is negative
+// value in fp_t is representable in du_int or is negative
// (no range checking performed)
-COMPILER_RT_ABI du_int __fixunstfdi(long double a);
+COMPILER_RT_ABI du_int __fixunstfdi(fp_t a);
-int test__fixunstfdi(long double a, du_int expected)
+int test__fixunstfdi(fp_t a, du_int expected)
{
du_int x = __fixunstfdi(a);
if (x != expected)
@@ -29,13 +30,13 @@ int test__fixunstfdi(long double a, du_int expected)
char assumption_1[sizeof(du_int) == 2*sizeof(su_int)] = {0};
char assumption_2[sizeof(du_int)*CHAR_BIT == 64] = {0};
-char assumption_3[sizeof(long double)*CHAR_BIT == 128] = {0};
+char assumption_3[sizeof(fp_t)*CHAR_BIT == 128] = {0};
#endif
int main()
{
-#if _ARCH_PPC || __aarch64__ || __arm64ec__
+#if defined(CRT_HAS_TF_MODE)
if (test__fixunstfdi(0.0, 0))
return 1;
diff --git a/compiler-rt/test/builtins/Unit/multc3_test.c b/compiler-rt/test/builtins/Unit/multc3_test.c
index e9c99a7..18561cc 100644
--- a/compiler-rt/test/builtins/Unit/multc3_test.c
+++ b/compiler-rt/test/builtins/Unit/multc3_test.c
@@ -1,24 +1,26 @@
-// XFAIL: target=aarch64-{{.*}}-windows-{{.*}}
// RUN: %clang_builtins %s %librt -o %t && %run %t
// REQUIRES: librt_has_multc3
#include <stdio.h>
+#include "int_lib.h"
-#if _ARCH_PPC || __aarch64__ || __arm64ec__
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
-#include "int_lib.h"
#include <math.h>
#include <complex.h>
// Returns: the product of a + ib and c + id
-COMPILER_RT_ABI long double _Complex
-__multc3(long double __a, long double __b, long double __c, long double __d);
+COMPILER_RT_ABI Qcomplex
+__multc3(fp_t __a, fp_t __b, fp_t __c, fp_t __d);
enum {zero, non_zero, inf, NaN, non_zero_nan};
int
-classify(long double _Complex x)
+classify(Qcomplex x)
{
if (x == 0)
return zero;
@@ -41,13 +43,13 @@ classify(long double _Complex x)
return non_zero;
}
-int test__multc3(long double a, long double b, long double c, long double d)
+int test__multc3(fp_t a, fp_t b, fp_t c, fp_t d)
{
- long double _Complex r = __multc3(a, b, c, d);
+ Qcomplex r = __multc3(a, b, c, d);
// printf("test__multc3(%Lf, %Lf, %Lf, %Lf) = %Lf + I%Lf\n",
// a, b, c, d, creall(r), cimagl(r));
- long double _Complex dividend;
- long double _Complex divisor;
+ Qcomplex dividend;
+ Qcomplex divisor;
__real__ dividend = a;
__imag__ dividend = b;
@@ -188,7 +190,7 @@ int test__multc3(long double a, long double b, long double c, long double d)
return 0;
}
-long double x[][2] =
+fp_t x[][2] =
{
{ 1.e-6, 1.e-6},
{-1.e-6, 1.e-6},
@@ -348,7 +350,7 @@ long double x[][2] =
int main()
{
-#if _ARCH_PPC || __aarch64__ || __arm64ec__
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
const unsigned N = sizeof(x) / sizeof(x[0]);
unsigned i, j;
for (i = 0; i < N; ++i)
diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h
index 010f218..a86cbd8 100644
--- a/libc/src/string/memory_utils/op_generic.h
+++ b/libc/src/string/memory_utils/op_generic.h
@@ -41,12 +41,22 @@ static_assert((UINTPTR_MAX == 4294967295U) ||
"We currently only support 32- or 64-bit platforms");
#ifdef LIBC_COMPILER_IS_MSVC
-
+#ifdef LIBC_TARGET_ARCH_IS_X86
namespace LIBC_NAMESPACE_DECL {
using generic_v128 = __m128i;
using generic_v256 = __m256i;
using generic_v512 = __m512i;
} // namespace LIBC_NAMESPACE_DECL
+#else
+// Special handling when target does not have real vector types.
+// We can potentially use uint8x16_t etc. However, MSVC does not provide
+// subscript operation.
+namespace LIBC_NAMESPACE_DECL {
+struct alignas(16) generic_v128 : public cpp::array<uint8_t, 16> {};
+struct alignas(32) generic_v256 : public cpp::array<uint8_t, 32> {};
+struct alignas(64) generic_v512 : public cpp::array<uint8_t, 64> {};
+} // namespace LIBC_NAMESPACE_DECL
+#endif
#else
namespace LIBC_NAMESPACE_DECL {
@@ -159,7 +169,8 @@ template <typename T> struct Memset {
LIBC_INLINE static void block(Ptr dst, uint8_t value) {
if constexpr (is_scalar_v<T> || is_vector_v<T>) {
- store<T>(dst, splat<T>(value));
+ // Avoid ambiguous call due to ADL
+ generic::store<T>(dst, splat<T>(value));
} else if constexpr (is_array_v<T>) {
using value_type = typename T::value_type;
const auto Splat = splat<value_type>(value);
diff --git a/libc/test/UnitTest/FEnvSafeTest.cpp b/libc/test/UnitTest/FEnvSafeTest.cpp
index 2730de3..4393f9d 100644
--- a/libc/test/UnitTest/FEnvSafeTest.cpp
+++ b/libc/test/UnitTest/FEnvSafeTest.cpp
@@ -43,7 +43,7 @@ void FEnvSafeTest::set_fenv(const fenv_t &fenv) {
void FEnvSafeTest::expect_fenv_eq(const fenv_t &before_fenv,
const fenv_t &after_fenv) {
-#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && !defined(LIBC_COMPILER_IS_MSVC)
using FPState = LIBC_NAMESPACE::fputil::FEnv::FPState;
const FPState &before_state = reinterpret_cast<const FPState &>(before_fenv);
const FPState &after_state = reinterpret_cast<const FPState &>(after_fenv);
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 6ea1ea0..566dde6 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -44,6 +44,9 @@ MinGW Improvements
MachO Improvements
------------------
+* ``--separate-cstring-literal-sections`` emits cstring literal sections into sections defined by their section name.
+ (`#158720 <https://github.com/llvm/llvm-project/pull/158720>`_)
+
WebAssembly Improvements
------------------------
diff --git a/lld/test/wasm/archive-export.test b/lld/test/wasm/archive-export.test
index 9a76d60..c67e500 100644
--- a/lld/test/wasm/archive-export.test
+++ b/lld/test/wasm/archive-export.test
@@ -14,6 +14,9 @@ CHECK: Exports:
CHECK-NEXT: - Name: memory
CHECK-NEXT: Kind: MEMORY
CHECK-NEXT: Index: 0
+CHECK-NEXT: - Name: __stack_pointer
+CHECK-NEXT: Kind: GLOBAL
+CHECK-NEXT: Index: 0
CHECK-NEXT: - Name: foo
CHECK-NEXT: Kind: FUNCTION
CHECK-NEXT: Index: 1
diff --git a/lld/test/wasm/comdats.ll b/lld/test/wasm/comdats.ll
index 8fc301e..2dd687f 100644
--- a/lld/test/wasm/comdats.ll
+++ b/lld/test/wasm/comdats.ll
@@ -35,6 +35,9 @@ entry:
; CHECK-NEXT: - Name: memory
; CHECK-NEXT: Kind: MEMORY
; CHECK-NEXT: Index: 0
+; CHECK-NEXT: - Name: __stack_pointer
+; CHECK-NEXT: Kind: GLOBAL
+; CHECK-NEXT: Index: 0
; CHECK-NEXT: - Name: _start
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Index: 1
diff --git a/lld/test/wasm/visibility-hidden.ll b/lld/test/wasm/visibility-hidden.ll
index 36c29a8e..6ed7ba3 100644
--- a/lld/test/wasm/visibility-hidden.ll
+++ b/lld/test/wasm/visibility-hidden.ll
@@ -43,6 +43,9 @@ entry:
; CHECK-NEXT: - Name: memory
; CHECK-NEXT: Kind: MEMORY
; CHECK-NEXT: Index: 0
+; CHECK-NEXT: - Name: __stack_pointer
+; CHECK-NEXT: Kind: GLOBAL
+; CHECK-NEXT: Index: 0
; CHECK-NEXT: - Name: objectDefault
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Index: 1
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 9b85b6c..46c848d 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -914,9 +914,10 @@ static InputGlobal *createGlobal(StringRef name, bool isMutable) {
return make<InputGlobal>(wasmGlobal, nullptr);
}
-static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable) {
+static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable,
+ uint32_t flags = 0) {
InputGlobal *g = createGlobal(name, isMutable);
- return symtab->addSyntheticGlobal(name, WASM_SYMBOL_VISIBILITY_HIDDEN, g);
+ return symtab->addSyntheticGlobal(name, flags, g);
}
static GlobalSymbol *createOptionalGlobal(StringRef name, bool isMutable) {
@@ -966,9 +967,13 @@ static void createSyntheticSymbols() {
}
if (ctx.arg.sharedMemory) {
- ctx.sym.tlsBase = createGlobalVariable("__tls_base", true);
- ctx.sym.tlsSize = createGlobalVariable("__tls_size", false);
- ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false);
+ // TLS symbols are all hidden/dso-local
+ ctx.sym.tlsBase =
+ createGlobalVariable("__tls_base", true, WASM_SYMBOL_VISIBILITY_HIDDEN);
+ ctx.sym.tlsSize = createGlobalVariable("__tls_size", false,
+ WASM_SYMBOL_VISIBILITY_HIDDEN);
+ ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false,
+ WASM_SYMBOL_VISIBILITY_HIDDEN);
ctx.sym.initTLS = symtab->addSyntheticFunction(
"__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
make<SyntheticFunction>(is64 ? i64ArgSignature : i32ArgSignature,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index f1e73d7..82e9d86 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -3126,43 +3126,6 @@ void DWARFASTParserClang::ParseSingleMember(
if (!member_clang_type.IsCompleteType())
member_clang_type.GetCompleteType();
- {
- // Older versions of clang emit the same DWARF for array[0] and array[1]. If
- // the current field is at the end of the structure, then there is
- // definitely no room for extra elements and we override the type to
- // array[0]. This was fixed by f454dfb6b5af.
- CompilerType member_array_element_type;
- uint64_t member_array_size;
- bool member_array_is_incomplete;
-
- if (member_clang_type.IsArrayType(&member_array_element_type,
- &member_array_size,
- &member_array_is_incomplete) &&
- !member_array_is_incomplete) {
- uint64_t parent_byte_size =
- parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size, UINT64_MAX);
-
- // If the attrs.member_byte_offset is still set to UINT32_MAX this means
- // that the DW_TAG_member didn't have a DW_AT_data_member_location, so
- // don't emit an error if this is the case.
- if (attrs.member_byte_offset != UINT32_MAX &&
- attrs.member_byte_offset >= parent_byte_size) {
- if (member_array_size != 1 &&
- (member_array_size != 0 ||
- attrs.member_byte_offset > parent_byte_size)) {
- module_sp->ReportError(
- "{0:x8}: DW_TAG_member '{1}' refers to type {2:x16}"
- " which extends beyond the bounds of {3:x8}",
- die.GetID(), attrs.name,
- attrs.encoding_form.Reference().GetOffset(), parent_die.GetID());
- }
-
- member_clang_type =
- m_ast.CreateArrayType(member_array_element_type, 0, false);
- }
- }
- }
-
TypeSystemClang::RequireCompleteType(member_clang_type);
clang::FieldDecl *field_decl = TypeSystemClang::AddFieldToRecordType(
diff --git a/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml
new file mode 100644
index 0000000..4e659d0
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/incomplete-member-beyond-parent-bounds.yaml
@@ -0,0 +1,104 @@
+# This is DWARF where we placed an incomplete type
+# at an offset that is the parent DW_AT_byte_size. Check
+# that we don't report an error in such cases.
+#
+# DW_TAG_compile_unit
+# DW_AT_name ("main.cpp")
+# DW_AT_language (DW_LANG_C)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Incomplete")
+# DW_AT_external (true)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Foo")
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_member
+# DW_AT_name ("mem")
+# DW_AT_data_member_location ("0x04")
+# DW_AT_type (0x00000011 "Incomplete")
+#
+# NULL
+#
+# NULL
+
+# RUN: yaml2obj %s > %t
+# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s
+
+# CHECK: Found 1 types:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+DWARF:
+ debug_str:
+ - main.cpp
+ - Incomplete
+ - Foo
+ - mem
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Code: 0x2
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_external
+ Form: DW_FORM_flag_present
+ - Code: 0x3
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x4
+ Tag: DW_TAG_member
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref4
+ - Attribute: DW_AT_data_member_location
+ Form: DW_FORM_data1
+ debug_info:
+ - Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x0
+ - Value: 0x2
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x9
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x14
+ - Value: 0x04
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x18
+ - Value: 0x11
+ - Value: 0x04
+ - AbbrCode: 0x0
+ - AbbrCode: 0x0
+...
diff --git a/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml
new file mode 100644
index 0000000..2ac538e
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/member-beyond-parent-bounds.yaml
@@ -0,0 +1,109 @@
+# This is malformed DWARF where we placed a non-zero sized type
+# at an offset that is larger the parent DW_AT_byte_size. Check
+# that we report an error in such cases.
+#
+# DW_TAG_compile_unit
+# DW_AT_name ("main.cpp")
+# DW_AT_language (DW_LANG_C)
+#
+# DW_TAG_base_type
+# DW_AT_name ("int")
+# DW_AT_encoding (DW_ATE_signed)
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Foo")
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_member
+# DW_AT_name ("mem")
+# DW_AT_data_member_location ("0x05")
+# DW_AT_type (0x00000011 "int")
+#
+# NULL
+#
+# NULL
+
+# RUN: yaml2obj %s > %t
+# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s
+
+# CHECK: Found 1 types:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+DWARF:
+ debug_str:
+ - main.cpp
+ - int
+ - Foo
+ - mem
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Code: 0x2
+ Tag: DW_TAG_base_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_encoding
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x3
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x4
+ Tag: DW_TAG_member
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref4
+ - Attribute: DW_AT_data_member_location
+ Form: DW_FORM_data1
+ debug_info:
+ - Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x0
+ - Value: 0x2
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x9
+ - Value: 0x5
+ - Value: 0x4
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x0d
+ - Value: 0x04
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x11
+ - Value: 0x11
+ - Value: 0x05
+ - AbbrCode: 0x0
+ - AbbrCode: 0x0
+...
diff --git a/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml
new file mode 100644
index 0000000..736697c
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/member-on-parent-bounds.yaml
@@ -0,0 +1,109 @@
+# This is malformed DWARF where we placed a non-zero sized type
+# at an offset that is the parent DW_AT_byte_size. Check
+# that we report an error in such cases.
+#
+# DW_TAG_compile_unit
+# DW_AT_name ("main.cpp")
+# DW_AT_language (DW_LANG_C)
+#
+# DW_TAG_base_type
+# DW_AT_name ("int")
+# DW_AT_encoding (DW_ATE_signed)
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Foo")
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_member
+# DW_AT_name ("mem")
+# DW_AT_data_member_location ("0x04")
+# DW_AT_type (0x00000011 "int")
+#
+# NULL
+#
+# NULL
+
+# RUN: yaml2obj %s > %t
+# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s
+
+# CHECK: Found 1 types:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+DWARF:
+ debug_str:
+ - main.cpp
+ - int
+ - Foo
+ - mem
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Code: 0x2
+ Tag: DW_TAG_base_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_encoding
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x3
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x4
+ Tag: DW_TAG_member
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref4
+ - Attribute: DW_AT_data_member_location
+ Form: DW_FORM_data1
+ debug_info:
+ - Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x0
+ - Value: 0x2
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x9
+ - Value: 0x5
+ - Value: 0x4
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x0d
+ - Value: 0x04
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x11
+ - Value: 0x11
+ - Value: 0x04
+ - AbbrCode: 0x0
+ - AbbrCode: 0x0
+...
diff --git a/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml b/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml
index fbdc626..1d1e129 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml
+++ b/lldb/test/Shell/SymbolFile/DWARF/union-types-no-member-location.yaml
@@ -48,14 +48,10 @@
# RUN: yaml2obj %s > %t
# RUN: lldb-test symbols --name=UnionType --find=type %t > %t.stdout
# RUN: cat %t.stdout | FileCheck --check-prefix=STDOUT %s
-# RUN: lldb-test symbols --name=UnionType --find=type %t 2> %t.stderr
-# RUN: cat %t.stderr | FileCheck --allow-empty --check-prefix=STDERR %s
# STDOUT: Found 1 types:
# STDOUT: {{(0x)?[0-9a-fA-F]+}}: Type{0x0000002b} , name = "UnionType", size = 32, compiler_type = 0x{{[0-9a-fA-F]+}} union UnionType {
-# STDERR-NOT: error: union-types-no-member-location.yaml.tmp 0x00000031: DW_TAG_member 'array' refers to type 0x000000000000001f which extends beyond the bounds of 0x0000002b
-
--- !ELF
FileHeader:
Class: ELFCLASS64
diff --git a/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml b/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml
new file mode 100644
index 0000000..a98f62c
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/zero-sized-member-in-parent-bounds.yaml
@@ -0,0 +1,105 @@
+# This is DWARF where we placed a zero-sized type
+# at an offset that is the parent DW_AT_byte_size. Check
+# that we don't report an error in such cases.
+#
+# DW_TAG_compile_unit
+# DW_AT_name ("main.cpp")
+# DW_AT_language (DW_LANG_C)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Bar")
+# DW_AT_byte_size (0x00)
+#
+# DW_TAG_structure_type
+# DW_AT_name ("Foo")
+# DW_AT_byte_size (0x04)
+#
+# DW_TAG_member
+# DW_AT_name ("mem")
+# DW_AT_data_member_location ("0x04")
+# DW_AT_type (0x00000011 "Bar")
+#
+# NULL
+#
+# NULL
+
+# RUN: yaml2obj %s > %t
+# RUN: lldb-test symbols --name=Foo --find=type %t 2>&1 | FileCheck %s
+
+# CHECK: Found 1 types:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+DWARF:
+ debug_str:
+ - main.cpp
+ - Bar
+ - Foo
+ - mem
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_udata
+ - Code: 0x2
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x3
+ Tag: DW_TAG_structure_type
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ - Code: 0x4
+ Tag: DW_TAG_member
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref4
+ - Attribute: DW_AT_data_member_location
+ Form: DW_FORM_data1
+ debug_info:
+ - Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x0
+ - Value: 0x2
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x9
+ - Value: 0x0
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x0d
+ - Value: 0x04
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0x11
+ - Value: 0x11
+ - Value: 0x04
+ - AbbrCode: 0x0
+ - AbbrCode: 0x0
+...
diff --git a/llvm/docs/CommandGuide/llvm-readelf.rst b/llvm/docs/CommandGuide/llvm-readelf.rst
index 284c3aa4..5403fea 100644
--- a/llvm/docs/CommandGuide/llvm-readelf.rst
+++ b/llvm/docs/CommandGuide/llvm-readelf.rst
@@ -143,6 +143,10 @@ OPTIONS
Display all notes.
+.. option:: --offloading
+
+ Display list of HIP offload bundles.
+
.. option:: --pretty-print
When used with :option:`--elf-output-style`, JSON output will be formatted in
diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst
index 8bd29ea..0d05b94 100644
--- a/llvm/docs/CommandGuide/llvm-readobj.rst
+++ b/llvm/docs/CommandGuide/llvm-readobj.rst
@@ -104,6 +104,10 @@ file formats.
Do not demangle symbol names in the output. This option is only for ELF and
XCOFF file formats. The option is enabled by default.
+.. option:: --offloading
+
+ Display list of HIP offload bundles.
+
.. option:: --relocations, --relocs, -r
Display the relocation entries in the file.
diff --git a/llvm/include/llvm/CAS/FileOffset.h b/llvm/include/llvm/CAS/FileOffset.h
new file mode 100644
index 0000000..21d045e
--- /dev/null
+++ b/llvm/include/llvm/CAS/FileOffset.h
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares interface for FileOffset that represent stored data at an
+/// offset from the beginning of a file.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_FILEOFFSET_H
+#define LLVM_CAS_FILEOFFSET_H
+
+#include <cstdlib>
+
+namespace llvm::cas {
+
+/// FileOffset is a wrapper around `uint64_t` to represent the offset of data
+/// from the beginning of the file.
+class FileOffset {
+public:
+ uint64_t get() const { return Offset; }
+
+ explicit operator bool() const { return Offset; }
+
+ FileOffset() = default;
+ explicit FileOffset(uint64_t Offset) : Offset(Offset) {}
+
+private:
+ uint64_t Offset = 0;
+};
+
+} // namespace llvm::cas
+
+#endif // LLVM_CAS_FILEOFFSET_H
diff --git a/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h
new file mode 100644
index 0000000..5e41bf6
--- /dev/null
+++ b/llvm/include/llvm/CAS/OnDiskTrieRawHashMap.h
@@ -0,0 +1,236 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares interface for OnDiskTrieRawHashMap, a thread-safe and
+/// (mostly) lock-free hash map stored as trie and backed by persistent files on
+/// disk.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CAS_ONDISKTRIERAWHASHMAP_H
+#define LLVM_CAS_ONDISKTRIERAWHASHMAP_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/FileOffset.h"
+#include "llvm/Support/Error.h"
+#include <optional>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace cas {
+
+/// OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
+/// The keys are fixed length, and are expected to be binary hashes with a
+/// normal distribution.
+///
+/// - Thread-safety is achieved through the use of atomics within a shared
+/// memory mapping. Atomic access does not work on networked filesystems.
+/// - Filesystem locks are used, but only sparingly:
+/// - during initialization, for creating / opening an existing store;
+/// - for the lifetime of the instance, a shared/reader lock is held
+/// - during destruction, if there are no concurrent readers, to shrink the
+/// files to their minimum size.
+/// - Path is used as a directory:
+/// - "index" stores the root trie and subtries.
+/// - "data" stores (most of) the entries, like a bump-ptr-allocator.
+/// - Large entries are stored externally in a file named by the key.
+/// - Code is system-dependent and binary format itself is not portable. These
+/// are not artifacts that can/should be moved between different systems; they
+/// are only appropriate for local storage.
+class OnDiskTrieRawHashMap {
+public:
+ LLVM_DUMP_METHOD void dump() const;
+ void
+ print(raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
+
+public:
+ /// Const value proxy to access the records stored in TrieRawHashMap.
+ struct ConstValueProxy {
+ ConstValueProxy() = default;
+ ConstValueProxy(ArrayRef<uint8_t> Hash, ArrayRef<char> Data)
+ : Hash(Hash), Data(Data) {}
+ ConstValueProxy(ArrayRef<uint8_t> Hash, StringRef Data)
+ : Hash(Hash), Data(Data.begin(), Data.size()) {}
+
+ ArrayRef<uint8_t> Hash;
+ ArrayRef<char> Data;
+ };
+
+ /// Value proxy to access the records stored in TrieRawHashMap.
+ struct ValueProxy {
+ operator ConstValueProxy() const { return ConstValueProxy(Hash, Data); }
+
+ ValueProxy() = default;
+ ValueProxy(ArrayRef<uint8_t> Hash, MutableArrayRef<char> Data)
+ : Hash(Hash), Data(Data) {}
+
+ ArrayRef<uint8_t> Hash;
+ MutableArrayRef<char> Data;
+ };
+
+ /// Validate the trie data structure.
+ ///
+ /// Callback receives the file offset to the data entry and the data stored.
+ Error validate(
+ function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const;
+
+ /// Check the valid range of file offset for OnDiskTrieRawHashMap.
+ static bool validOffset(FileOffset Offset) {
+ return Offset.get() < (1LL << 48);
+ }
+
+public:
+ /// Template class to implement a `pointer` type into the trie data structure.
+ ///
+ /// It provides pointer-like operation, e.g., dereference to get underlying
+ /// data. It also reserves the top 16 bits of the pointer value, which can be
+ /// used to pack additional information if needed.
+ template <class ProxyT> class PointerImpl {
+ public:
+ FileOffset getOffset() const {
+ return FileOffset(OffsetLow32 | (uint64_t)OffsetHigh16 << 32);
+ }
+
+ explicit operator bool() const { return IsValue; }
+
+ const ProxyT &operator*() const {
+ assert(IsValue);
+ return Value;
+ }
+ const ProxyT *operator->() const {
+ assert(IsValue);
+ return &Value;
+ }
+
+ PointerImpl() = default;
+
+ protected:
+ PointerImpl(ProxyT Value, FileOffset Offset, bool IsValue = true)
+ : Value(Value), OffsetLow32((uint64_t)Offset.get()),
+ OffsetHigh16((uint64_t)Offset.get() >> 32), IsValue(IsValue) {
+ if (IsValue)
+ assert(validOffset(Offset));
+ }
+
+ ProxyT Value;
+ uint32_t OffsetLow32 = 0;
+ uint16_t OffsetHigh16 = 0;
+
+ // True if points to a value (not a "nullptr"). Use an extra field because
+ // 0 can be a valid offset.
+ bool IsValue = false;
+ };
+
+ class pointer;
+ class const_pointer : public PointerImpl<ConstValueProxy> {
+ public:
+ const_pointer() = default;
+
+ private:
+ friend class pointer;
+ friend class OnDiskTrieRawHashMap;
+ using const_pointer::PointerImpl::PointerImpl;
+ };
+
+ class pointer : public PointerImpl<ValueProxy> {
+ public:
+ operator const_pointer() const {
+ return const_pointer(Value, getOffset(), IsValue);
+ }
+
+ pointer() = default;
+
+ private:
+ friend class OnDiskTrieRawHashMap;
+ using pointer::PointerImpl::PointerImpl;
+ };
+
+ /// Find the value from hash.
+ ///
+ /// \returns pointer to the value if exists, otherwise returns a non-value
+ /// pointer that evaluates to `false` when convert to boolean.
+ const_pointer find(ArrayRef<uint8_t> Hash) const;
+
+ /// Helper function to recover a pointer into the trie from file offset.
+ Expected<const_pointer> recoverFromFileOffset(FileOffset Offset) const;
+
+ using LazyInsertOnConstructCB =
+ function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue)>;
+ using LazyInsertOnLeakCB =
+ function_ref<void(FileOffset TentativeOffset, ValueProxy TentativeValue,
+ FileOffset FinalOffset, ValueProxy FinalValue)>;
+
+ /// Insert lazily.
+ ///
+ /// \p OnConstruct is called when ready to insert a value, after allocating
+ /// space for the data. It is called at most once.
+ ///
+ /// \p OnLeak is called only if \p OnConstruct has been called and a race
+ /// occurred before insertion, causing the tentative offset and data to be
+ /// abandoned. This allows clients to clean up other results or update any
+ /// references.
+ ///
+ /// NOTE: Does *not* guarantee that \p OnConstruct is only called on success.
+ /// The in-memory \a TrieRawHashMap uses LazyAtomicPointer to synchronize
+ /// simultaneous writes, but that seems dangerous to use in a memory-mapped
+ /// file in case a process crashes in the busy state.
+ Expected<pointer> insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct = nullptr,
+ LazyInsertOnLeakCB OnLeak = nullptr);
+
+ Expected<pointer> insert(const ConstValueProxy &Value) {
+ return insertLazy(Value.Hash, [&](FileOffset, ValueProxy Allocated) {
+ assert(Allocated.Hash == Value.Hash);
+ assert(Allocated.Data.size() == Value.Data.size());
+ llvm::copy(Value.Data, Allocated.Data.begin());
+ });
+ }
+
+ size_t size() const;
+ size_t capacity() const;
+
+ /// Gets or creates a file at \p Path with a hash-mapped trie named \p
+ /// TrieName. The hash size is \p NumHashBits (in bits) and the records store
+ /// data of size \p DataSize (in bytes).
+ ///
+ /// \p MaxFileSize controls the maximum file size to support, limiting the
+ /// size of the \a mapped_file_region. \p NewFileInitialSize is the starting
+ /// size if a new file is created.
+ ///
+ /// \p NewTableNumRootBits and \p NewTableNumSubtrieBits are hints to
+ /// configure the trie, if it doesn't already exist.
+ ///
+ /// \pre NumHashBits is a multiple of 8 (byte-aligned).
+ static Expected<OnDiskTrieRawHashMap>
+ create(const Twine &Path, const Twine &TrieName, size_t NumHashBits,
+ uint64_t DataSize, uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits = std::nullopt,
+ std::optional<size_t> NewTableNumSubtrieBits = std::nullopt);
+
+ OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS);
+ OnDiskTrieRawHashMap &operator=(OnDiskTrieRawHashMap &&RHS);
+ ~OnDiskTrieRawHashMap();
+
+private:
+ struct ImplType;
+ explicit OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl);
+ std::unique_ptr<ImplType> Impl;
+};
+
+} // namespace cas
+} // namespace llvm
+
+#endif // LLVM_CAS_ONDISKTRIERAWHASHMAP_H
diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index de9675f..e97160e 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -185,6 +185,14 @@ inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I,
StringRef PassName);
+/// Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch
+/// weights in the new instruction if the parent function of the original
+/// instruction has an entry count. This is to not confuse users by injecting
+/// profile data into non-profiled functions.
+LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I,
+ Function &F,
+ StringRef PassName);
+
/// Analogous to setExplicitlyUnknownBranchWeights, but for functions and their
/// entry counts.
LLVM_ABI void setExplicitlyUnknownFunctionEntryCount(Function &F,
diff --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h
index f4d5a1d..18be62b 100644
--- a/llvm/include/llvm/Object/OffloadBundle.h
+++ b/llvm/include/llvm/Object/OffloadBundle.h
@@ -161,7 +161,7 @@ public:
OffsetStr.getAsInteger(10, O);
Str = Str.drop_front(OffsetStr.size());
- if (Str.consume_front("&size="))
+ if (!Str.consume_front("&size="))
return createStringError(object_error::parse_failed,
"Reading 'size' in URI");
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index fa313f5..d6c2d7f 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -64,6 +64,8 @@ protected:
/// A worklist of the instructions that need to be simplified.
InstructionWorklist &Worklist;
+ Function &F;
+
// Mode in which we are running the combiner.
const bool MinimizeSize;
@@ -98,17 +100,17 @@ protected:
bool ComputedBackEdges = false;
public:
- InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
- bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
- TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
- DominatorTree &DT, OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
- ProfileSummaryInfo *PSI, const DataLayout &DL,
+ InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder, Function &F,
+ AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI,
+ TargetTransformInfo &TTI, DominatorTree &DT,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
+ const DataLayout &DL,
ReversePostOrderTraversal<BasicBlock *> &RPOT)
: TTIForTargetIntrinsicsOnly(TTI), Builder(Builder), Worklist(Worklist),
- MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL),
- SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
- /*CanUseUndef*/ true, &DC),
+ F(F), MinimizeSize(F.hasMinSize()), AA(AA), AC(AC), TLI(TLI), DT(DT),
+ DL(DL), SQ(DL, &TLI, &DT, &AC, nullptr, /*UseInstrInfo*/ true,
+ /*CanUseUndef*/ true, &DC),
ORE(ORE), BFI(BFI), BPI(BPI), PSI(PSI), RPOT(RPOT) {}
virtual ~InstCombiner() = default;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
index 4d3ead29..f53cee2 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h
@@ -20,11 +20,15 @@ namespace llvm {
/// The gcov-style instrumentation pass
class GCOVProfilerPass : public PassInfoMixin<GCOVProfilerPass> {
public:
- GCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()) : GCOVOpts(Options) { }
+ GCOVProfilerPass(
+ const GCOVOptions &Options = GCOVOptions::getDefault(),
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS = vfs::getRealFileSystem())
+ : GCOVOpts(Options), VFS(std::move(VFS)) {}
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
GCOVOptions GCOVOpts;
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS;
};
} // namespace llvm
diff --git a/llvm/lib/CAS/CMakeLists.txt b/llvm/lib/CAS/CMakeLists.txt
index 6ed724b..7ae5f7e 100644
--- a/llvm/lib/CAS/CMakeLists.txt
+++ b/llvm/lib/CAS/CMakeLists.txt
@@ -2,14 +2,19 @@ add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
BuiltinCAS.cpp
+ DatabaseFile.cpp
InMemoryCAS.cpp
MappedFileRegionArena.cpp
ObjectStore.cpp
OnDiskCommon.cpp
+ OnDiskTrieRawHashMap.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/CAS
+ LINK_LIBS
+ ${LLVM_PTHREAD_LIB}
+
LINK_COMPONENTS
Support
)
diff --git a/llvm/lib/CAS/DatabaseFile.cpp b/llvm/lib/CAS/DatabaseFile.cpp
new file mode 100644
index 0000000..db8ce1d
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.cpp
@@ -0,0 +1,123 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file implements the common abstractions for CAS database file.
+///
+//===----------------------------------------------------------------------===//
+
+#include "DatabaseFile.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+Error ondisk::createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg) {
+ return createStringError(make_error_code(ErrC),
+ Path + "[" + TableName + "]: " + Msg);
+}
+
+Error ondisk::checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName) {
+ if (Expected == Observed)
+ return Error::success();
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "mismatched " + Label +
+ " (expected: " + Twine(Expected) +
+ ", observed: " + Twine(Observed) + ")");
+}
+
+Expected<DatabaseFile>
+DatabaseFile::create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor) {
+ // Constructor for if the file doesn't exist.
+ auto NewFileConstructor = [&](MappedFileRegionArena &Alloc) -> Error {
+ if (Alloc.capacity() <
+ sizeof(Header) + sizeof(MappedFileRegionArena::Header))
+ return createTableConfigError(std::errc::argument_out_of_domain,
+ Path.str(), "datafile",
+ "Allocator too small for header");
+ (void)new (Alloc.data()) Header{getMagic(), getVersion(), {0}};
+ DatabaseFile DB(Alloc);
+ return NewDBConstructor(DB);
+ };
+
+ // Get or create the file.
+ MappedFileRegionArena Alloc;
+ if (Error E = MappedFileRegionArena::create(Path, Capacity, sizeof(Header),
+ NewFileConstructor)
+ .moveInto(Alloc))
+ return std::move(E);
+
+ return DatabaseFile::get(
+ std::make_unique<MappedFileRegionArena>(std::move(Alloc)));
+}
+
+Error DatabaseFile::addTable(TableHandle Table) {
+ assert(Table);
+ assert(&Table.getRegion() == &getRegion());
+ int64_t ExistingRootOffset = 0;
+ const int64_t NewOffset =
+ reinterpret_cast<const char *>(&Table.getHeader()) - getRegion().data();
+ if (H->RootTableOffset.compare_exchange_strong(ExistingRootOffset, NewOffset))
+ return Error::success();
+
+ // Silently ignore attempts to set the root to itself.
+ if (ExistingRootOffset == NewOffset)
+ return Error::success();
+
+ // Return an proper error message.
+ TableHandle Root(getRegion(), ExistingRootOffset);
+ if (Root.getName() == Table.getName())
+ return createStringError(
+ make_error_code(std::errc::not_supported),
+ "collision with existing table of the same name '" + Table.getName() +
+ "'");
+
+ return createStringError(make_error_code(std::errc::not_supported),
+ "cannot add new table '" + Table.getName() +
+ "'"
+ " to existing root '" +
+ Root.getName() + "'");
+}
+
+std::optional<TableHandle> DatabaseFile::findTable(StringRef Name) {
+ int64_t RootTableOffset = H->RootTableOffset.load();
+ if (!RootTableOffset)
+ return std::nullopt;
+
+ TableHandle Root(getRegion(), RootTableOffset);
+ if (Root.getName() == Name)
+ return Root;
+
+ return std::nullopt;
+}
+
+Error DatabaseFile::validate(MappedFileRegion &Region) {
+ if (Region.size() < sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: missing header");
+
+ // Check the magic and version.
+ auto *H = reinterpret_cast<Header *>(Region.data());
+ if (H->Magic != getMagic())
+ return createStringError(std::errc::invalid_argument,
+ "database: bad magic");
+ if (H->Version != getVersion())
+ return createStringError(std::errc::invalid_argument,
+ "database: wrong version");
+
+ auto *MFH = reinterpret_cast<MappedFileRegionArena::Header *>(Region.data() +
+ sizeof(Header));
+ // Check the bump-ptr, which should point past the header.
+ if (MFH->BumpPtr.load() < (int64_t)sizeof(Header))
+ return createStringError(std::errc::invalid_argument,
+ "database: corrupt bump-ptr");
+
+ return Error::success();
+}
diff --git a/llvm/lib/CAS/DatabaseFile.h b/llvm/lib/CAS/DatabaseFile.h
new file mode 100644
index 0000000..609e5f13
--- /dev/null
+++ b/llvm/lib/CAS/DatabaseFile.h
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file declares the common interface for a DatabaseFile that is used to
+/// implement OnDiskCAS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CAS_DATABASEFILE_H
+#define LLVM_LIB_CAS_DATABASEFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm::cas::ondisk {
+
+using MappedFileRegion = MappedFileRegionArena::RegionT;
+
+/// Generic handle for a table.
+///
+/// Generic table header layout:
+/// - 2-bytes: TableKind
+/// - 2-bytes: TableNameSize
+/// - 4-bytes: TableNameRelOffset (relative to header)
+class TableHandle {
+public:
+ enum class TableKind : uint16_t {
+ TrieRawHashMap = 1,
+ DataAllocator = 2,
+ };
+ struct Header {
+ TableKind Kind;
+ uint16_t NameSize;
+ int32_t NameRelOffset; ///< Relative to Header.
+ };
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ template <class T> static void check() {
+ static_assert(
+ std::is_same<decltype(T::Header::GenericHeader), Header>::value,
+ "T::GenericHeader should be of type TableHandle::Header");
+ static_assert(offsetof(typename T::Header, GenericHeader) == 0,
+ "T::GenericHeader must be the head of T::Header");
+ }
+ template <class T> bool is() const { return T::Kind == H->Kind; }
+ template <class T> T dyn_cast() const {
+ check<T>();
+ if (is<T>())
+ return T(*Region, *reinterpret_cast<typename T::Header *>(H));
+ return T();
+ }
+ template <class T> T cast() const {
+ assert(is<T>());
+ return dyn_cast<T>();
+ }
+
+ StringRef getName() const {
+ auto *Begin = reinterpret_cast<const char *>(H) + H->NameRelOffset;
+ return StringRef(Begin, H->NameSize);
+ }
+
+ TableHandle() = default;
+ TableHandle(MappedFileRegion &Region, Header &H) : Region(&Region), H(&H) {}
+ TableHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TableHandle(Region,
+ *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+/// Encapsulate a database file, which:
+/// - Sets/checks magic.
+/// - Sets/checks version.
+/// - Points at an arbitrary root table.
+/// - Sets up a MappedFileRegionArena for allocation.
+///
+/// Top-level layout:
+/// - 4-bytes: Magic
+/// - 4-bytes: Version
+/// - 8-bytes: RootTableOffset (16-bits: Kind; 48-bits: Offset)
+/// - 8-bytes: BumpPtr from MappedFileRegionArena
+class DatabaseFile {
+public:
+ static constexpr uint32_t getMagic() { return 0xDA7ABA53UL; }
+ static constexpr uint32_t getVersion() { return 1UL; }
+ struct Header {
+ uint32_t Magic;
+ uint32_t Version;
+ std::atomic<int64_t> RootTableOffset;
+ };
+
+ const Header &getHeader() { return *H; }
+ MappedFileRegionArena &getAlloc() { return Alloc; }
+ MappedFileRegion &getRegion() { return Alloc.getRegion(); }
+
+ /// Add a table. This is currently not thread safe and should be called inside
+ /// NewDBConstructor.
+ Error addTable(TableHandle Table);
+
+ /// Find a table. May return null.
+ std::optional<TableHandle> findTable(StringRef Name);
+
+ /// Create the DatabaseFile at Path with Capacity.
+ static Expected<DatabaseFile>
+ create(const Twine &Path, uint64_t Capacity,
+ function_ref<Error(DatabaseFile &)> NewDBConstructor);
+
+ size_t size() const { return Alloc.size(); }
+
+private:
+ static Expected<DatabaseFile>
+ get(std::unique_ptr<MappedFileRegionArena> Alloc) {
+ if (Error E = validate(Alloc->getRegion()))
+ return std::move(E);
+ return DatabaseFile(std::move(Alloc));
+ }
+
+ static Error validate(MappedFileRegion &Region);
+
+ DatabaseFile(MappedFileRegionArena &Alloc)
+ : H(reinterpret_cast<Header *>(Alloc.data())), Alloc(Alloc) {}
+ DatabaseFile(std::unique_ptr<MappedFileRegionArena> Alloc)
+ : DatabaseFile(*Alloc) {
+ OwnedAlloc = std::move(Alloc);
+ }
+
+ Header *H = nullptr;
+ MappedFileRegionArena &Alloc;
+ std::unique_ptr<MappedFileRegionArena> OwnedAlloc;
+};
+
+Error createTableConfigError(std::errc ErrC, StringRef Path,
+ StringRef TableName, const Twine &Msg);
+
+Error checkTable(StringRef Label, size_t Expected, size_t Observed,
+ StringRef Path, StringRef TrieName);
+
+} // namespace llvm::cas::ondisk
+
+#endif
diff --git a/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
new file mode 100644
index 0000000..9b382dd7
--- /dev/null
+++ b/llvm/lib/CAS/OnDiskTrieRawHashMap.cpp
@@ -0,0 +1,1178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file Implements OnDiskTrieRawHashMap.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "DatabaseFile.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TrieHashIndexGenerator.h"
+#include "llvm/CAS/MappedFileRegionArena.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::cas;
+using namespace llvm::cas::ondisk;
+
+#if LLVM_ENABLE_ONDISK_CAS
+
+//===----------------------------------------------------------------------===//
+// TrieRawHashMap data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class SubtrieHandle;
+class TrieRawHashMapHandle;
+class TrieVisitor;
+
+/// A value stored in the slots inside a SubTrie. A stored value can either be a
+/// subtrie (encoded after negation) which is the file offset to another
+/// subtrie, or it can be a fileset to a DataRecord.
+class SubtrieSlotValue {
+public:
+ explicit operator bool() const { return !isEmpty(); }
+ bool isEmpty() const { return !Offset; }
+ bool isData() const { return Offset > 0; }
+ bool isSubtrie() const { return Offset < 0; }
+ uint64_t asData() const {
+ assert(isData());
+ return Offset;
+ }
+ uint64_t asSubtrie() const {
+ assert(isSubtrie());
+ return -Offset;
+ }
+
+ FileOffset asSubtrieFileOffset() const { return FileOffset(asSubtrie()); }
+
+ FileOffset asDataFileOffset() const { return FileOffset(asData()); }
+
+ int64_t getRawOffset() const { return Offset; }
+
+ static SubtrieSlotValue getDataOffset(int64_t Offset) {
+ return SubtrieSlotValue(Offset);
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(int64_t Offset) {
+ return SubtrieSlotValue(-Offset);
+ }
+
+ static SubtrieSlotValue getDataOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getSubtrieOffset(FileOffset Offset) {
+ return getDataOffset(Offset.get());
+ }
+
+ static SubtrieSlotValue getFromSlot(std::atomic<int64_t> &Slot) {
+ return SubtrieSlotValue(Slot.load());
+ }
+
+ SubtrieSlotValue() = default;
+
+private:
+ friend class SubtrieHandle;
+ explicit SubtrieSlotValue(int64_t Offset) : Offset(Offset) {}
+ int64_t Offset = 0;
+};
+
+/// Subtrie layout:
+/// - 2-bytes: StartBit
+/// - 1-bytes: NumBits=lg(num-slots)
+/// - 5-bytes: 0-pad
+/// - <slots>
+class SubtrieHandle {
+public:
+ struct Header {
+ /// The bit this subtrie starts on.
+ uint16_t StartBit;
+
+ /// The number of bits this subtrie handles. It has 2^NumBits slots.
+ uint8_t NumBits;
+
+ /// 0-pad to 8B.
+ uint8_t ZeroPad1B;
+ uint32_t ZeroPad4B;
+ };
+
+ /// Slot storage:
+ /// - zero: Empty
+ /// - positive: RecordOffset
+ /// - negative: SubtrieOffset
+ using SlotT = std::atomic<int64_t>;
+
+ static int64_t getSlotsSize(uint32_t NumBits) {
+ return sizeof(int64_t) * (1u << NumBits);
+ }
+
+ static int64_t getSize(uint32_t NumBits) {
+ return sizeof(SubtrieHandle::Header) + getSlotsSize(NumBits);
+ }
+
+ int64_t getSize() const { return getSize(H->NumBits); }
+ size_t getNumSlots() const { return Slots.size(); }
+
+ SubtrieSlotValue load(size_t I) const {
+ return SubtrieSlotValue(Slots[I].load());
+ }
+ void store(size_t I, SubtrieSlotValue V) {
+ return Slots[I].store(V.getRawOffset());
+ }
+
+ void printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const;
+
+ /// Return None on success, or the existing offset on failure.
+ bool compare_exchange_strong(size_t I, SubtrieSlotValue &Expected,
+ SubtrieSlotValue New) {
+ return Slots[I].compare_exchange_strong(Expected.Offset, New.Offset);
+ }
+
+ /// Sink \p V from \p I in this subtrie down to \p NewI in a new subtrie with
+ /// \p NumSubtrieBits.
+ ///
+ /// \p UnusedSubtrie maintains a 1-item "free" list of unused subtries. If a
+ /// new subtrie is created that isn't used because of a lost race, then it If
+ /// it's already valid, it should be used instead of allocating a new one.
+ /// should be returned as an out parameter to be passed back in the future.
+ /// If it's already valid, it should be used instead of allocating a new one.
+ ///
+ /// Returns the subtrie that now lives at \p I.
+ Expected<SubtrieHandle> sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie, size_t NewI);
+
+ /// Only safe if the subtrie is empty.
+ void reinitialize(uint32_t StartBit, uint32_t NumBits);
+
+ SubtrieSlotValue getOffset() const {
+ return SubtrieSlotValue::getSubtrieOffset(
+ reinterpret_cast<const char *>(H) - Region->data());
+ }
+
+ FileOffset getFileOffset() const { return getOffset().asSubtrieFileOffset(); }
+
+ explicit operator bool() const { return H; }
+
+ Header &getHeader() const { return *H; }
+ uint32_t getStartBit() const { return H->StartBit; }
+ uint32_t getNumBits() const { return H->NumBits; }
+
+ static Expected<SubtrieHandle> create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit, uint32_t NumBits);
+
+ static SubtrieHandle getFromFileOffset(MappedFileRegion &Region,
+ FileOffset Offset) {
+ return SubtrieHandle(Region, SubtrieSlotValue::getSubtrieOffset(Offset));
+ }
+
+ SubtrieHandle() = default;
+ SubtrieHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H), Slots(getSlots(H)) {}
+ SubtrieHandle(MappedFileRegion &Region, SubtrieSlotValue Offset)
+ : SubtrieHandle(Region, *reinterpret_cast<Header *>(
+ Region.data() + Offset.asSubtrie())) {}
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+ MutableArrayRef<SlotT> Slots;
+
+ static MutableArrayRef<SlotT> getSlots(Header &H) {
+ return MutableArrayRef(reinterpret_cast<SlotT *>(&H + 1), 1u << H.NumBits);
+ }
+};
+
+/// Handle for a TrieRawHashMap table.
+///
+/// TrieRawHashMap table layout:
+/// - [8-bytes: Generic table header]
+/// - 1-byte: NumSubtrieBits
+/// - 1-byte: Flags (not used yet)
+/// - 2-bytes: NumHashBits
+/// - 4-bytes: RecordDataSize (in bytes)
+/// - 8-bytes: RootTrieOffset
+/// - 8-bytes: AllocatorOffset (reserved for implementing free lists)
+/// - <name> '\0'
+///
+/// Record layout:
+/// - <hash>
+/// - <data>
+class TrieRawHashMapHandle {
+public:
+ static constexpr TableHandle::TableKind Kind =
+ TableHandle::TableKind::TrieRawHashMap;
+
+ struct Header {
+ TableHandle::Header GenericHeader;
+ uint8_t NumSubtrieBits;
+ uint8_t Flags; ///< None used yet.
+ uint16_t NumHashBits;
+ uint32_t RecordDataSize;
+ std::atomic<int64_t> RootTrieOffset;
+ std::atomic<int64_t> AllocatorOffset;
+ };
+
+ operator TableHandle() const {
+ if (!H)
+ return TableHandle();
+ return TableHandle(*Region, H->GenericHeader);
+ }
+
+ struct RecordData {
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ SubtrieSlotValue Offset;
+ FileOffset getFileOffset() const { return Offset.asDataFileOffset(); }
+ };
+
+ enum Limits : size_t {
+ /// Seems like 65528 hash bits ought to be enough.
+ MaxNumHashBytes = UINT16_MAX >> 3,
+ MaxNumHashBits = MaxNumHashBytes << 3,
+
+ /// 2^16 bits in a trie is 65536 slots. This restricts us to a 16-bit
+ /// index. This many slots is suspicously large anyway.
+ MaxNumRootBits = 16,
+
+ /// 2^10 bits in a trie is 1024 slots. This many slots seems suspiciously
+ /// large for subtries.
+ MaxNumSubtrieBits = 10,
+ };
+
+ static constexpr size_t getNumHashBytes(size_t NumHashBits) {
+ assert(NumHashBits % 8 == 0);
+ return NumHashBits / 8;
+ }
+ static constexpr size_t getRecordSize(size_t RecordDataSize,
+ size_t NumHashBits) {
+ return RecordDataSize + getNumHashBytes(NumHashBits);
+ }
+
+ RecordData getRecord(SubtrieSlotValue Offset);
+ Expected<RecordData> createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash);
+
+ explicit operator bool() const { return H; }
+ const Header &getHeader() const { return *H; }
+ SubtrieHandle getRoot() const;
+ Expected<SubtrieHandle> getOrCreateRoot(MappedFileRegionArena &Alloc);
+ MappedFileRegion &getRegion() const { return *Region; }
+
+ size_t getFlags() const { return H->Flags; }
+ size_t getNumSubtrieBits() const { return H->NumSubtrieBits; }
+ size_t getNumHashBits() const { return H->NumHashBits; }
+ size_t getNumHashBytes() const { return getNumHashBytes(H->NumHashBits); }
+ size_t getRecordDataSize() const { return H->RecordDataSize; }
+ size_t getRecordSize() const {
+ return getRecordSize(H->RecordDataSize, H->NumHashBits);
+ }
+
+ TrieHashIndexGenerator getIndexGen(SubtrieHandle Root,
+ ArrayRef<uint8_t> Hash) {
+ assert(Root.getStartBit() == 0);
+ assert(getNumHashBytes() == Hash.size());
+ assert(getNumHashBits() == Hash.size() * 8);
+ return TrieHashIndexGenerator{Root.getNumBits(), getNumSubtrieBits(), Hash};
+ }
+
+ static Expected<TrieRawHashMapHandle>
+ create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits, uint64_t NumSubtrieBits,
+ uint64_t NumHashBits, uint64_t RecordDataSize);
+
+ void
+ print(raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData = nullptr) const;
+
+ Error validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const;
+ TrieRawHashMapHandle() = default;
+ TrieRawHashMapHandle(MappedFileRegion &Region, Header &H)
+ : Region(&Region), H(&H) {}
+ TrieRawHashMapHandle(MappedFileRegion &Region, intptr_t HeaderOffset)
+ : TrieRawHashMapHandle(
+ Region, *reinterpret_cast<Header *>(Region.data() + HeaderOffset)) {
+ }
+
+private:
+ MappedFileRegion *Region = nullptr;
+ Header *H = nullptr;
+};
+
+} // end anonymous namespace
+
+struct OnDiskTrieRawHashMap::ImplType {
+ DatabaseFile File;
+ TrieRawHashMapHandle Trie;
+};
+
+Expected<SubtrieHandle> SubtrieHandle::create(MappedFileRegionArena &Alloc,
+ uint32_t StartBit,
+ uint32_t NumBits) {
+ assert(StartBit <= TrieRawHashMapHandle::MaxNumHashBits);
+ assert(NumBits <= UINT8_MAX);
+ assert(NumBits <= TrieRawHashMapHandle::MaxNumRootBits);
+
+ auto Mem = Alloc.allocate(getSize(NumBits));
+ if (LLVM_UNLIKELY(!Mem))
+ return Mem.takeError();
+ auto *H =
+ new (*Mem) SubtrieHandle::Header{(uint16_t)StartBit, (uint8_t)NumBits,
+ /*ZeroPad1B=*/0, /*ZeroPad4B=*/0};
+ SubtrieHandle S(Alloc.getRegion(), *H);
+ for (auto I = S.Slots.begin(), E = S.Slots.end(); I != E; ++I)
+ new (I) SlotT(0);
+ return S;
+}
+
+SubtrieHandle TrieRawHashMapHandle::getRoot() const {
+ if (int64_t Root = H->RootTrieOffset)
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Root));
+ return SubtrieHandle();
+}
+
+Expected<SubtrieHandle>
+TrieRawHashMapHandle::getOrCreateRoot(MappedFileRegionArena &Alloc) {
+ assert(&Alloc.getRegion() == &getRegion());
+ if (SubtrieHandle Root = getRoot())
+ return Root;
+
+ int64_t Race = 0;
+ auto LazyRoot = SubtrieHandle::create(Alloc, 0, H->NumSubtrieBits);
+ if (LLVM_UNLIKELY(!LazyRoot))
+ return LazyRoot.takeError();
+ if (H->RootTrieOffset.compare_exchange_strong(
+ Race, LazyRoot->getOffset().asSubtrie()))
+ return *LazyRoot;
+
+ // There was a race. Return the other root.
+ //
+ // TODO: Avoid leaking the lazy root by storing it in an allocator.
+ return SubtrieHandle(getRegion(), SubtrieSlotValue::getSubtrieOffset(Race));
+}
+
+Expected<TrieRawHashMapHandle>
+TrieRawHashMapHandle::create(MappedFileRegionArena &Alloc, StringRef Name,
+ std::optional<uint64_t> NumRootBits,
+ uint64_t NumSubtrieBits, uint64_t NumHashBits,
+ uint64_t RecordDataSize) {
+ // Allocate.
+ auto Offset = Alloc.allocateOffset(sizeof(Header) + Name.size() + 1);
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ // Construct the header and the name.
+ assert(Name.size() <= UINT16_MAX && "Expected smaller table name");
+ assert(NumSubtrieBits <= UINT8_MAX && "Expected valid subtrie bits");
+ assert(NumHashBits <= UINT16_MAX && "Expected valid hash size");
+ assert(RecordDataSize <= UINT32_MAX && "Expected smaller table name");
+ auto *H = new (Alloc.getRegion().data() + *Offset)
+ Header{{TableHandle::TableKind::TrieRawHashMap, (uint16_t)Name.size(),
+ (uint32_t)sizeof(Header)},
+ (uint8_t)NumSubtrieBits,
+ /*Flags=*/0,
+ (uint16_t)NumHashBits,
+ (uint32_t)RecordDataSize,
+ /*RootTrieOffset=*/{0},
+ /*AllocatorOffset=*/{0}};
+ char *NameStorage = reinterpret_cast<char *>(H + 1);
+ llvm::copy(Name, NameStorage);
+ NameStorage[Name.size()] = 0;
+
+ // Construct a root trie, if requested.
+ TrieRawHashMapHandle Trie(Alloc.getRegion(), *H);
+ auto Sub = SubtrieHandle::create(Alloc, 0, *NumRootBits);
+ if (LLVM_UNLIKELY(!Sub))
+ return Sub.takeError();
+ if (NumRootBits)
+ H->RootTrieOffset = Sub->getOffset().asSubtrie();
+ return Trie;
+}
+
+TrieRawHashMapHandle::RecordData
+TrieRawHashMapHandle::getRecord(SubtrieSlotValue Offset) {
+ char *Begin = Region->data() + Offset.asData();
+ OnDiskTrieRawHashMap::ValueProxy Proxy;
+ Proxy.Data = MutableArrayRef(Begin, getRecordDataSize());
+ Proxy.Hash = ArrayRef(reinterpret_cast<const uint8_t *>(Proxy.Data.end()),
+ getNumHashBytes());
+ return RecordData{Proxy, Offset};
+}
+
+Expected<TrieRawHashMapHandle::RecordData>
+TrieRawHashMapHandle::createRecord(MappedFileRegionArena &Alloc,
+ ArrayRef<uint8_t> Hash) {
+ assert(&Alloc.getRegion() == Region);
+ assert(Hash.size() == getNumHashBytes());
+ auto Offset = Alloc.allocateOffset(getRecordSize());
+ if (LLVM_UNLIKELY(!Offset))
+ return Offset.takeError();
+
+ RecordData Record = getRecord(SubtrieSlotValue::getDataOffset(*Offset));
+ llvm::copy(Hash, const_cast<uint8_t *>(Record.Proxy.Hash.begin()));
+ return Record;
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ // Check alignment.
+ if (!isAligned(MappedFileRegionArena::getAlign(), Offset.get()))
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "unaligned file offset at 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Check bounds.
+ //
+ // Note: There's no potential overflow when using \c uint64_t because Offset
+ // is in valid offset range and the record size is in \c [0,UINT32_MAX].
+ if (!validOffset(Offset) ||
+ Offset.get() + Impl->Trie.getRecordSize() > Impl->File.getAlloc().size())
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "file offset too large: 0x" +
+ utohexstr(Offset.get(), /*LowerCase=*/true));
+
+ // Looks okay...
+ TrieRawHashMapHandle::RecordData D =
+ Impl->Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ return const_pointer(D.Proxy, D.getFileOffset());
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ SubtrieHandle S = Trie.getRoot();
+ if (!S)
+ return const_pointer();
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(S, Hash);
+ size_t Index = IndexGen.next();
+ for (;;) {
+ // Try to set the content.
+ SubtrieSlotValue V = S.load(Index);
+ if (!V)
+ return const_pointer();
+
+ // Check for an exact match.
+ if (V.isData()) {
+ TrieRawHashMapHandle::RecordData D = Trie.getRecord(V);
+ return D.Proxy.Hash == Hash ? const_pointer(D.Proxy, D.getFileOffset())
+ : const_pointer();
+ }
+
+ Index = IndexGen.next();
+ S = SubtrieHandle(Trie.getRegion(), V);
+ }
+}
+
+/// Only safe if the subtrie is empty.
+void SubtrieHandle::reinitialize(uint32_t StartBit, uint32_t NumBits) {
+ assert(StartBit > H->StartBit);
+ assert(NumBits <= H->NumBits);
+ // Ideally would also assert that all slots are empty, but that's expensive.
+
+ H->StartBit = StartBit;
+ H->NumBits = NumBits;
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ TrieRawHashMapHandle Trie = Impl->Trie;
+ assert(Hash.size() == Trie.getNumHashBytes() && "Invalid hash");
+
+ MappedFileRegionArena &Alloc = Impl->File.getAlloc();
+ std::optional<SubtrieHandle> S;
+ auto Err = Trie.getOrCreateRoot(Alloc).moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+
+ TrieHashIndexGenerator IndexGen = Trie.getIndexGen(*S, Hash);
+ size_t Index = IndexGen.next();
+
+ // Walk through the hash bytes and insert into correct trie position.
+ std::optional<TrieRawHashMapHandle::RecordData> NewRecord;
+ SubtrieHandle UnusedSubtrie;
+ for (;;) {
+ SubtrieSlotValue Existing = S->load(Index);
+
+ // Try to set it, if it's empty.
+ if (!Existing) {
+ if (!NewRecord) {
+ auto Err = Trie.createRecord(Alloc, Hash).moveInto(NewRecord);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ if (OnConstruct)
+ OnConstruct(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy);
+ }
+
+ if (S->compare_exchange_strong(Index, Existing, NewRecord->Offset))
+ return pointer(NewRecord->Proxy, NewRecord->Offset.asDataFileOffset());
+
+ // Race means that Existing is no longer empty; fall through...
+ }
+
+ if (Existing.isSubtrie()) {
+ S = SubtrieHandle(Trie.getRegion(), Existing);
+ Index = IndexGen.next();
+ continue;
+ }
+
+ // Check for an exact match.
+ TrieRawHashMapHandle::RecordData ExistingRecord = Trie.getRecord(Existing);
+ if (ExistingRecord.Proxy.Hash == Hash) {
+ if (NewRecord && OnLeak)
+ OnLeak(NewRecord->Offset.asDataFileOffset(), NewRecord->Proxy,
+ ExistingRecord.Offset.asDataFileOffset(), ExistingRecord.Proxy);
+ return pointer(ExistingRecord.Proxy,
+ ExistingRecord.Offset.asDataFileOffset());
+ }
+
+ // Sink the existing content as long as the indexes match.
+ for (;;) {
+ size_t NextIndex = IndexGen.next();
+ size_t NewIndexForExistingContent =
+ IndexGen.getCollidingBits(ExistingRecord.Proxy.Hash);
+
+ auto Err = S->sink(Index, Existing, Alloc, IndexGen.getNumBits(),
+ UnusedSubtrie, NewIndexForExistingContent)
+ .moveInto(S);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ Index = NextIndex;
+
+ // Found the difference.
+ if (NextIndex != NewIndexForExistingContent)
+ break;
+ }
+ }
+}
+
+Expected<SubtrieHandle> SubtrieHandle::sink(size_t I, SubtrieSlotValue V,
+ MappedFileRegionArena &Alloc,
+ size_t NumSubtrieBits,
+ SubtrieHandle &UnusedSubtrie,
+ size_t NewI) {
+ std::optional<SubtrieHandle> NewS;
+ if (UnusedSubtrie) {
+ // Steal UnusedSubtrie and initialize it.
+ NewS.emplace();
+ std::swap(*NewS, UnusedSubtrie);
+ NewS->reinitialize(getStartBit() + getNumBits(), NumSubtrieBits);
+ } else {
+ // Allocate a new, empty subtrie.
+ auto Err = SubtrieHandle::create(Alloc, getStartBit() + getNumBits(),
+ NumSubtrieBits)
+ .moveInto(NewS);
+ if (LLVM_UNLIKELY(Err))
+ return std::move(Err);
+ }
+
+ NewS->store(NewI, V);
+ if (compare_exchange_strong(I, V, NewS->getOffset()))
+ return *NewS; // Success!
+
+ // Raced.
+ assert(V.isSubtrie() && "Expected racing sink() to add a subtrie");
+
+ // Wipe out the new slot so NewS can be reused and set the out parameter.
+ NewS->store(NewI, SubtrieSlotValue());
+ UnusedSubtrie = *NewS;
+
+ // Return the subtrie added by the concurrent sink() call.
+ return SubtrieHandle(Alloc.getRegion(), V);
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ Impl->Trie.print(OS, PrintRecordData);
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, ConstValueProxy)> RecordVerifier) const {
+ return Impl->Trie.validate(RecordVerifier);
+}
+
+// Helper function that prints hexdigit and have a sub-byte starting position.
+static void printHexDigits(raw_ostream &OS, ArrayRef<uint8_t> Bytes,
+ size_t StartBit, size_t NumBits) {
+ assert(StartBit % 4 == 0);
+ assert(NumBits % 4 == 0);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; I += 4) {
+ uint8_t HexPair = Bytes[I / 8];
+ uint8_t HexDigit = I % 8 == 0 ? HexPair >> 4 : HexPair & 0xf;
+ OS << hexdigit(HexDigit, /*LowerCase=*/true);
+ }
+}
+
+static void printBits(raw_ostream &OS, ArrayRef<uint8_t> Bytes, size_t StartBit,
+ size_t NumBits) {
+ assert(StartBit + NumBits <= Bytes.size() * 8u);
+ for (size_t I = StartBit, E = StartBit + NumBits; I != E; ++I) {
+ uint8_t Byte = Bytes[I / 8];
+ size_t ByteOffset = I % 8;
+ if (size_t ByteShift = 8 - ByteOffset - 1)
+ Byte >>= ByteShift;
+ OS << (Byte & 0x1 ? '1' : '0');
+ }
+}
+
+void SubtrieHandle::printHash(raw_ostream &OS, ArrayRef<uint8_t> Bytes) const {
+ // afb[1c:00*01110*0]def
+ size_t EndBit = getStartBit() + getNumBits();
+ size_t HashEndBit = Bytes.size() * 8u;
+
+ size_t FirstBinaryBit = getStartBit() & ~0x3u;
+ printHexDigits(OS, Bytes, 0, FirstBinaryBit);
+
+ size_t LastBinaryBit = (EndBit + 3u) & ~0x3u;
+ OS << "[";
+ printBits(OS, Bytes, FirstBinaryBit, LastBinaryBit - FirstBinaryBit);
+ OS << "]";
+
+ printHexDigits(OS, Bytes, LastBinaryBit, HashEndBit - LastBinaryBit);
+}
+
+static void appendIndexBits(std::string &Prefix, size_t Index,
+ size_t NumSlots) {
+ std::string Bits;
+ for (size_t NumBits = 1u; NumBits < NumSlots; NumBits <<= 1) {
+ Bits.push_back('0' + (Index & 0x1));
+ Index >>= 1;
+ }
+ for (char Ch : llvm::reverse(Bits))
+ Prefix += Ch;
+}
+
+static void printPrefix(raw_ostream &OS, StringRef Prefix) {
+ while (Prefix.size() >= 4) {
+ uint8_t Digit;
+ bool ErrorParsingBinary = Prefix.take_front(4).getAsInteger(2, Digit);
+ assert(!ErrorParsingBinary);
+ (void)ErrorParsingBinary;
+ OS << hexdigit(Digit, /*LowerCase=*/true);
+ Prefix = Prefix.drop_front(4);
+ }
+ if (!Prefix.empty())
+ OS << "[" << Prefix << "]";
+}
+
+LLVM_DUMP_METHOD void OnDiskTrieRawHashMap::dump() const { print(dbgs()); }
+
+static Expected<size_t> checkParameter(StringRef Label, size_t Max,
+ std::optional<size_t> Value,
+ std::optional<size_t> Default,
+ StringRef Path, StringRef TableName) {
+ assert(Value || Default);
+ assert(!Default || *Default <= Max);
+ if (!Value)
+ return *Default;
+
+ if (*Value <= Max)
+ return *Value;
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TableName,
+ "invalid " + Label + ": " + Twine(*Value) + " (max: " + Twine(Max) + ")");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return Impl->File.size(); }
+size_t OnDiskTrieRawHashMap::capacity() const {
+ return Impl->File.getRegion().size();
+}
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ SmallString<128> PathStorage;
+ StringRef Path = PathTwine.toStringRef(PathStorage);
+ SmallString<128> TrieNameStorage;
+ StringRef TrieName = TrieNameTwine.toStringRef(TrieNameStorage);
+
+ constexpr size_t DefaultNumRootBits = 10;
+ constexpr size_t DefaultNumSubtrieBits = 6;
+
+ size_t NumRootBits;
+ if (Error E = checkParameter(
+ "root bits", TrieRawHashMapHandle::MaxNumRootBits,
+ NewTableNumRootBits, DefaultNumRootBits, Path, TrieName)
+ .moveInto(NumRootBits))
+ return std::move(E);
+
+ size_t NumSubtrieBits;
+ if (Error E = checkParameter("subtrie bits",
+ TrieRawHashMapHandle::MaxNumSubtrieBits,
+ NewTableNumSubtrieBits, DefaultNumSubtrieBits,
+ Path, TrieName)
+ .moveInto(NumSubtrieBits))
+ return std::move(E);
+
+ size_t NumHashBytes = NumHashBits >> 3;
+ if (Error E =
+ checkParameter("hash size", TrieRawHashMapHandle::MaxNumHashBits,
+ NumHashBits, std::nullopt, Path, TrieName)
+ .takeError())
+ return std::move(E);
+ assert(NumHashBits == NumHashBytes << 3 &&
+ "Expected hash size to be byte-aligned");
+ if (NumHashBits != NumHashBytes << 3)
+ return createTableConfigError(
+ std::errc::argument_out_of_domain, Path, TrieName,
+ "invalid hash size: " + Twine(NumHashBits) + " (not byte-aligned)");
+
+ // Constructor for if the file doesn't exist.
+ auto NewDBConstructor = [&](DatabaseFile &DB) -> Error {
+ auto Trie =
+ TrieRawHashMapHandle::create(DB.getAlloc(), TrieName, NumRootBits,
+ NumSubtrieBits, NumHashBits, DataSize);
+ if (LLVM_UNLIKELY(!Trie))
+ return Trie.takeError();
+
+ return DB.addTable(*Trie);
+ };
+
+ // Get or create the file.
+ Expected<DatabaseFile> File =
+ DatabaseFile::create(Path, MaxFileSize, NewDBConstructor);
+ if (!File)
+ return File.takeError();
+
+ // Find the trie and validate it.
+ std::optional<TableHandle> Table = File->findTable(TrieName);
+ if (!Table)
+ return createTableConfigError(std::errc::argument_out_of_domain, Path,
+ TrieName, "table not found");
+ if (Error E = checkTable("table kind", (size_t)TrieRawHashMapHandle::Kind,
+ (size_t)Table->getHeader().Kind, Path, TrieName))
+ return std::move(E);
+ auto Trie = Table->cast<TrieRawHashMapHandle>();
+ assert(Trie && "Already checked the kind");
+
+ // Check the hash and data size.
+ if (Error E = checkTable("hash size", NumHashBits, Trie.getNumHashBits(),
+ Path, TrieName))
+ return std::move(E);
+ if (Error E = checkTable("data size", DataSize, Trie.getRecordDataSize(),
+ Path, TrieName))
+ return std::move(E);
+
+ // No flags supported right now. Either corrupt, or coming from a future
+ // writer.
+ if (size_t Flags = Trie.getFlags())
+ return createTableConfigError(std::errc::invalid_argument, Path, TrieName,
+ "unsupported flags: " + Twine(Flags));
+
+ // Success.
+ OnDiskTrieRawHashMap::ImplType Impl{DatabaseFile(std::move(*File)), Trie};
+ return OnDiskTrieRawHashMap(std::make_unique<ImplType>(std::move(Impl)));
+}
+
+static Error createInvalidTrieError(uint64_t Offset, const Twine &Msg) {
+ return createStringError(make_error_code(std::errc::protocol_error),
+ "invalid trie at 0x" +
+ utohexstr(Offset, /*LowerCase=*/true) + ": " +
+ Msg);
+}
+
+//===----------------------------------------------------------------------===//
+// TrieVisitor data structures.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// A multi-threaded vistior to traverse the Trie.
+///
+/// TODO: add more sanity checks that isn't just plain data corruption. For
+/// example, some ill-formed data can be constructed to form a cycle using
+/// Sub-Tries and it can lead to inifinite loop when visiting (or inserting
+/// data).
+class TrieVisitor {
+public:
+ TrieVisitor(TrieRawHashMapHandle Trie, unsigned ThreadCount = 0,
+ unsigned ErrorLimit = 50)
+ : Trie(Trie), ErrorLimit(ErrorLimit),
+ Threads(hardware_concurrency(ThreadCount)) {}
+ virtual ~TrieVisitor() = default;
+ Error visit();
+
+private:
+ // Virtual method to implement the action when visiting a sub-trie.
+ virtual Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) {
+ return Error::success();
+ }
+
+ // Virtual method to implement the action when visiting a slot in a trie node.
+ virtual Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) {
+ return Error::success();
+ }
+
+protected:
+ TrieRawHashMapHandle Trie;
+
+private:
+ Error traverseTrieNode(SubtrieHandle Node, StringRef Prefix);
+
+ Error validateSubTrie(SubtrieHandle Node, bool IsRoot);
+
+ // Helper function to capture errors when visiting the trie nodes.
+ void addError(Error NewError) {
+ assert(NewError && "not an error");
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ if (NumError >= ErrorLimit) {
+ // Too many errors.
+ consumeError(std::move(NewError));
+ return;
+ }
+
+ if (Err)
+ Err = joinErrors(std::move(*Err), std::move(NewError));
+ else
+ Err = std::move(NewError);
+ NumError++;
+ }
+
+ bool tooManyErrors() {
+ std::lock_guard<std::mutex> ErrorLock(Lock);
+ return (bool)Err && NumError >= ErrorLimit;
+ }
+
+ const unsigned ErrorLimit;
+ std::optional<Error> Err;
+ unsigned NumError = 0;
+ std::mutex Lock;
+ DefaultThreadPool Threads;
+};
+
+/// A visitor that traverse and print the Trie.
+class TriePrinter : public TrieVisitor {
+public:
+ TriePrinter(TrieRawHashMapHandle Trie, raw_ostream &OS,
+ function_ref<void(ArrayRef<char>)> PrintRecordData)
+ : TrieVisitor(Trie, /*ThreadCount=*/1), OS(OS),
+ PrintRecordData(PrintRecordData) {}
+
+ Error printRecords() {
+ if (Records.empty())
+ return Error::success();
+
+ OS << "records\n";
+ llvm::sort(Records);
+ for (int64_t Offset : Records) {
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Offset));
+ if (auto Err = printRecord(Record))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ Error printRecord(TrieRawHashMapHandle::RecordData &Record) {
+ OS << "- addr=" << (void *)Record.getFileOffset().get() << " ";
+ if (PrintRecordData) {
+ PrintRecordData(Record.Proxy.Data);
+ } else {
+ OS << "bytes=";
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Record.Proxy.Data.data()),
+ Record.Proxy.Data.size());
+ printHexDigits(OS, Data, 0, Data.size() * 8);
+ }
+ OS << "\n";
+ return Error::success();
+ }
+
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) override {
+ if (Prefix.empty()) {
+ OS << "root";
+ } else {
+ OS << "subtrie=";
+ printPrefix(OS, Prefix);
+ }
+
+ OS << " addr="
+ << (void *)(reinterpret_cast<const char *>(&SubTrie.getHeader()) -
+ Trie.getRegion().data());
+ OS << " num-slots=" << SubTrie.getNumSlots() << "\n";
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) override {
+ OS << "- index=";
+ for (size_t Pad : {10, 100, 1000})
+ if (I < Pad && Subtrie.getNumSlots() >= Pad)
+ OS << "0";
+ OS << I << " ";
+ if (Slot.isSubtrie()) {
+ OS << "addr=" << (void *)Slot.asSubtrie();
+ OS << " subtrie=";
+ printPrefix(OS, Prefix);
+ OS << "\n";
+ return Error::success();
+ }
+ TrieRawHashMapHandle::RecordData Record = Trie.getRecord(Slot);
+ OS << "addr=" << (void *)Record.getFileOffset().get();
+ OS << " content=";
+ Subtrie.printHash(OS, Record.Proxy.Hash);
+ OS << "\n";
+ Records.push_back(Slot.asData());
+ return Error::success();
+ }
+
+private:
+ raw_ostream &OS;
+ function_ref<void(ArrayRef<char>)> PrintRecordData;
+ SmallVector<int64_t> Records;
+};
+
+/// TrieVerifier that adds additional verification on top of the basic visitor.
+class TrieVerifier : public TrieVisitor {
+public:
+ TrieVerifier(
+ TrieRawHashMapHandle Trie,
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier)
+ : TrieVisitor(Trie), RecordVerifier(RecordVerifier) {}
+
+private:
+ Error visitSubTrie(StringRef Prefix, SubtrieHandle SubTrie) final {
+ return Error::success();
+ }
+
+ Error visitSlot(unsigned I, SubtrieHandle Subtrie, StringRef Prefix,
+ SubtrieSlotValue Slot) final {
+ if (RecordVerifier && Slot.isData()) {
+ if (!isAligned(MappedFileRegionArena::getAlign(), Slot.asData()))
+ return createInvalidTrieError(Slot.asData(), "mis-aligned data entry");
+
+ TrieRawHashMapHandle::RecordData Record =
+ Trie.getRecord(SubtrieSlotValue::getDataOffset(Slot.asData()));
+ return RecordVerifier(Slot.asDataFileOffset(),
+ OnDiskTrieRawHashMap::ConstValueProxy{
+ Record.Proxy.Hash, Record.Proxy.Data});
+ }
+ return Error::success();
+ }
+
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier;
+};
+} // namespace
+
+Error TrieVisitor::visit() {
+ auto Root = Trie.getRoot();
+ if (!Root)
+ return Error::success();
+
+ if (auto Err = validateSubTrie(Root, /*IsRoot=*/true))
+ return Err;
+
+ if (auto Err = visitSubTrie("", Root))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Root.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Root.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix;
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Root, SubtriePrefix, Slot))
+ return Err;
+ }
+
+ for (size_t I = 0, E = Subs.size(); I != E; ++I) {
+ Threads.async(
+ [&](unsigned Idx) {
+ // Don't run if there is an error already.
+ if (tooManyErrors())
+ return;
+ if (auto Err = traverseTrieNode(Subs[Idx], Prefixes[Idx]))
+ addError(std::move(Err));
+ },
+ I);
+ }
+
+ Threads.wait();
+ if (Err)
+ return std::move(*Err);
+ return Error::success();
+}
+
+Error TrieVisitor::validateSubTrie(SubtrieHandle Node, bool IsRoot) {
+ char *Addr = reinterpret_cast<char *>(&Node.getHeader());
+ const int64_t Offset = Node.getFileOffset().get();
+ if (Addr + Node.getSize() >=
+ Trie.getRegion().data() + Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "subtrie node spans out of bound");
+
+ if (!IsRoot &&
+ Node.getStartBit() + Node.getNumBits() > Trie.getNumHashBits()) {
+ return createInvalidTrieError(Offset,
+ "subtrie represents too many hash bits");
+ }
+
+ if (IsRoot) {
+ if (Node.getStartBit() != 0)
+ return createInvalidTrieError(Offset,
+ "root node doesn't start at 0 index");
+
+ return Error::success();
+ }
+
+ if (Node.getNumBits() > Trie.getNumSubtrieBits())
+ return createInvalidTrieError(Offset, "subtrie has wrong number of slots");
+
+ return Error::success();
+}
+
+Error TrieVisitor::traverseTrieNode(SubtrieHandle Node, StringRef Prefix) {
+ if (auto Err = validateSubTrie(Node, /*IsRoot=*/false))
+ return Err;
+
+ if (auto Err = visitSubTrie(Prefix, Node))
+ return Err;
+
+ SmallVector<SubtrieHandle> Subs;
+ SmallVector<std::string> Prefixes;
+ const size_t NumSlots = Node.getNumSlots();
+ for (size_t I = 0, E = NumSlots; I != E; ++I) {
+ SubtrieSlotValue Slot = Node.load(I);
+ if (!Slot)
+ continue;
+ uint64_t Offset = Slot.isSubtrie() ? Slot.asSubtrie() : Slot.asData();
+ if (Offset >= (uint64_t)Trie.getRegion().size())
+ return createInvalidTrieError(Offset, "slot points out of bound");
+ std::string SubtriePrefix = Prefix.str();
+ appendIndexBits(SubtriePrefix, I, NumSlots);
+ if (Slot.isSubtrie()) {
+ SubtrieHandle S(Trie.getRegion(), Slot);
+ Subs.push_back(S);
+ Prefixes.push_back(SubtriePrefix);
+ }
+ if (auto Err = visitSlot(I, Node, SubtriePrefix, Slot))
+ return Err;
+ }
+ for (size_t I = 0, E = Subs.size(); I != E; ++I)
+ if (auto Err = traverseTrieNode(Subs[I], Prefixes[I]))
+ return Err;
+
+ return Error::success();
+}
+
+void TrieRawHashMapHandle::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+ OS << "hash-num-bits=" << getNumHashBits()
+ << " hash-size=" << getNumHashBytes()
+ << " record-data-size=" << getRecordDataSize() << "\n";
+
+ TriePrinter Printer(*this, OS, PrintRecordData);
+ if (auto Err = Printer.visit())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ if (auto Err = Printer.printRecords())
+ OS << "error: " << toString(std::move(Err)) << "\n";
+
+ return;
+}
+
+Error TrieRawHashMapHandle::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ // Use the base TrieVisitor to identify the errors inside trie first.
+ TrieVisitor BasicVerifier(*this);
+ if (auto Err = BasicVerifier.visit())
+ return Err;
+
+ // If the trie data structure is sound, do a second pass to verify data and
+ // verifier function can assume the index is correct. However, there can be
+ // newly added bad entries that can still produce error.
+ TrieVerifier Verifier(*this, RecordVerifier);
+ return Verifier.visit();
+}
+
+#else // !LLVM_ENABLE_ONDISK_CAS
+
+struct OnDiskTrieRawHashMap::ImplType {};
+
+Expected<OnDiskTrieRawHashMap>
+OnDiskTrieRawHashMap::create(const Twine &PathTwine, const Twine &TrieNameTwine,
+ size_t NumHashBits, uint64_t DataSize,
+ uint64_t MaxFileSize,
+ std::optional<uint64_t> NewFileInitialSize,
+ std::optional<size_t> NewTableNumRootBits,
+ std::optional<size_t> NewTableNumSubtrieBits) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::pointer>
+OnDiskTrieRawHashMap::insertLazy(ArrayRef<uint8_t> Hash,
+ LazyInsertOnConstructCB OnConstruct,
+ LazyInsertOnLeakCB OnLeak) {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+Expected<OnDiskTrieRawHashMap::const_pointer>
+OnDiskTrieRawHashMap::recoverFromFileOffset(FileOffset Offset) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+OnDiskTrieRawHashMap::const_pointer
+OnDiskTrieRawHashMap::find(ArrayRef<uint8_t> Hash) const {
+ return const_pointer();
+}
+
+void OnDiskTrieRawHashMap::print(
+ raw_ostream &OS, function_ref<void(ArrayRef<char>)> PrintRecordData) const {
+}
+
+Error OnDiskTrieRawHashMap::validate(
+ function_ref<Error(FileOffset, OnDiskTrieRawHashMap::ConstValueProxy)>
+ RecordVerifier) const {
+ return createStringError(make_error_code(std::errc::not_supported),
+ "OnDiskTrieRawHashMap is not supported");
+}
+
+size_t OnDiskTrieRawHashMap::size() const { return 0; }
+size_t OnDiskTrieRawHashMap::capacity() const { return 0; }
+
+#endif // LLVM_ENABLE_ONDISK_CAS
+
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(std::unique_ptr<ImplType> Impl)
+ : Impl(std::move(Impl)) {}
+OnDiskTrieRawHashMap::OnDiskTrieRawHashMap(OnDiskTrieRawHashMap &&RHS) =
+ default;
+OnDiskTrieRawHashMap &
+OnDiskTrieRawHashMap::operator=(OnDiskTrieRawHashMap &&RHS) = default;
+OnDiskTrieRawHashMap::~OnDiskTrieRawHashMap() = default;
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index 5827292..99029c1 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -252,6 +252,13 @@ void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName) {
MDB.createString(PassName)}));
}
+void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, Function &F,
+ StringRef PassName) {
+ if (std::optional<Function::ProfileCount> EC = F.getEntryCount();
+ EC && EC->getCount() > 0)
+ setExplicitlyUnknownBranchWeights(I, PassName);
+}
+
void setExplicitlyUnknownFunctionEntryCount(Function &F, StringRef PassName) {
MDBuilder MDB(F.getContext());
F.setMetadata(
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index 1e1042c..0dd378e 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -89,17 +89,17 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
uint64_t EntryIDSize;
StringRef EntryID;
- if (auto EC = Reader.readInteger(EntryOffset))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntryOffset))
+ return Err;
- if (auto EC = Reader.readInteger(EntrySize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntrySize))
+ return Err;
- if (auto EC = Reader.readInteger(EntryIDSize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readInteger(EntryIDSize))
+ return Err;
- if (auto EC = Reader.readFixedString(EntryID, EntryIDSize))
- return errorCodeToError(object_error::parse_failed);
+ if (Error Err = Reader.readFixedString(EntryID, EntryIDSize))
+ return Err;
auto Entry = std::make_unique<OffloadBundleEntry>(
EntryOffset + SectionOffset, EntrySize, EntryIDSize, EntryID);
@@ -125,7 +125,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
// Read the Bundle Entries
Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
if (Err)
- return errorCodeToError(object_error::parse_failed);
+ return Err;
return std::unique_ptr<OffloadBundleFatBin>(TheBundle);
}
diff --git a/llvm/lib/Support/Mustache.cpp b/llvm/lib/Support/Mustache.cpp
index 686688a..8da6fdb 100644
--- a/llvm/lib/Support/Mustache.cpp
+++ b/llvm/lib/Support/Mustache.cpp
@@ -282,18 +282,15 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
// For example:
// The template string
// " \t{{#section}}A{{/section}}"
-// would be considered as having no text ahead and would be render as
+// would be considered as having no text ahead and would be render as:
// "A"
-// The exception for this is partial tag which requires us to
-// keep track of the indentation once it's rendered.
void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
Token &CurrentToken, Token::Type CurrentType) {
Token &PrevToken = Tokens[Idx - 1];
StringRef PrevTokenBody = PrevToken.TokenBody;
StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v");
size_t Indentation = PrevTokenBody.size() - Unindented.size();
- if (CurrentType != Token::Type::Partial)
- PrevToken.TokenBody = Unindented.str();
+ PrevToken.TokenBody = Unindented.str();
CurrentToken.setIndentation(Indentation);
}
@@ -439,7 +436,8 @@ class AddIndentationStringStream : public raw_ostream {
public:
explicit AddIndentationStringStream(llvm::raw_ostream &WrappedStream,
size_t Indentation)
- : Indentation(Indentation), WrappedStream(WrappedStream) {
+ : Indentation(Indentation), WrappedStream(WrappedStream),
+ NeedsIndent(true) {
SetUnbuffered();
}
@@ -448,10 +446,15 @@ protected:
llvm::StringRef Data(Ptr, Size);
SmallString<0> Indent;
Indent.resize(Indentation, ' ');
+
for (char C : Data) {
+ if (NeedsIndent && C != '\n') {
+ WrappedStream << Indent;
+ NeedsIndent = false;
+ }
WrappedStream << C;
if (C == '\n')
- WrappedStream << Indent;
+ NeedsIndent = true;
}
}
@@ -460,6 +463,7 @@ protected:
private:
size_t Indentation;
llvm::raw_ostream &WrappedStream;
+ bool NeedsIndent;
};
class Parser {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index cb57c43..d4d9e54 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
// we need to invert the branch condition to jump over TrueBB when the
// condition is false.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
// Insert branch instruction.
BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC)))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 56db09a..6d418fd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1134,7 +1134,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
}
}
-RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
+RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unrecognized conditional branch");
@@ -1554,7 +1554,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
return Register();
};
- unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC));
+ unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC));
// Might be case 1.
// Don't change 0 to 1 since we can use x0.
@@ -1801,7 +1801,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
// Add condition code, inverting if necessary.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
if (Invert)
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
NewMI.addImm(CC);
// Copy the false register.
@@ -3978,7 +3978,7 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case RISCV::PseudoCCMOVGPR: {
// CCMOV can be commuted by inverting the condition.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(CC);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 2bc499b..42a0c4c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -44,7 +44,7 @@ enum CondCode {
COND_INVALID
};
-CondCode getOppositeBranchCondition(CondCode);
+CondCode getInverseBranchCondition(CondCode);
unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0);
} // end of namespace RISCVCC
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 292eab7..cd04ff5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45169,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
return true;
+ case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -45239,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDV:
return false;
// SSE target shuffles.
+ case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::UNPCKL:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d1ca0a6..59e103cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -880,11 +880,11 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// zext(bool) + C -> bool ? C + 1 : C
if (match(Op0, m_ZExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::AddOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::AddOne(Op1C), Op1);
// sext(bool) + C -> bool ? C - 1 : C
if (match(Op0, m_SExt(m_Value(X))) &&
X->getType()->getScalarSizeInBits() == 1)
- return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
+ return createSelectInst(X, InstCombiner::SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
if (match(Op0, m_Not(m_Value(X)))) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 7a979c1..4f94aa2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -23,6 +23,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -62,14 +63,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
public InstVisitor<InstCombinerImpl, Instruction *> {
public:
InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder,
- bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
+ Function &F, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI,
ProfileSummaryInfo *PSI, const DataLayout &DL,
ReversePostOrderTraversal<BasicBlock *> &RPOT)
- : InstCombiner(Worklist, Builder, MinimizeSize, AA, AC, TLI, TTI, DT, ORE,
- BFI, BPI, PSI, DL, RPOT) {}
+ : InstCombiner(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI, BPI,
+ PSI, DL, RPOT) {}
virtual ~InstCombinerImpl() = default;
@@ -469,6 +470,17 @@ private:
Value *simplifyNonNullOperand(Value *V, bool HasDereferenceable,
unsigned Depth = 0);
+ SelectInst *createSelectInst(Value *C, Value *S1, Value *S2,
+ const Twine &NameStr = "",
+ InsertPosition InsertBefore = nullptr,
+ Instruction *MDFrom = nullptr) {
+ SelectInst *SI =
+ SelectInst::Create(C, S1, S2, NameStr, InsertBefore, MDFrom);
+ if (!MDFrom)
+ setExplicitlyUnknownBranchWeightsIfProfiled(*SI, F, DEBUG_TYPE);
+ return SI;
+ }
+
public:
/// Create and insert the idiom we use to indicate a block is unreachable
/// without having to rewrite the CFG from within InstCombine.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 550f095..d457e0c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1253,7 +1253,7 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
// shl (zext i1 X), C1 --> select (X, 1 << C1, 0)
if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
auto *NewC = Builder.CreateShl(ConstantInt::get(Ty, 1), C1);
- return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ return createSelectInst(X, NewC, ConstantInt::getNullValue(Ty));
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f0ddd5c..8fbaf68 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1735,7 +1735,7 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
Constant *Zero = ConstantInt::getNullValue(BO.getType());
Value *TVal = Builder.CreateBinOp(BO.getOpcode(), Ones, C);
Value *FVal = Builder.CreateBinOp(BO.getOpcode(), Zero, C);
- return SelectInst::Create(X, TVal, FVal);
+ return createSelectInst(X, TVal, FVal);
}
static Value *simplifyOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
@@ -5934,8 +5934,8 @@ static bool combineInstructionsOverFunction(
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
<< F.getName() << "\n");
- InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
- ORE, BFI, BPI, PSI, DL, RPOT);
+ InstCombinerImpl IC(Worklist, Builder, F, AA, AC, TLI, TTI, DT, ORE, BFI,
+ BPI, PSI, DL, RPOT);
IC.MaxArraySizeForCombine = MaxArraySize;
bool MadeChangeInThisIteration = IC.prepareWorklist(F);
MadeChangeInThisIteration |= IC.run();
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e5bf2d1..d842275 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -35,6 +35,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
@@ -92,8 +93,10 @@ class GCOVFunction;
class GCOVProfiler {
public:
- GCOVProfiler() : GCOVProfiler(GCOVOptions::getDefault()) {}
- GCOVProfiler(const GCOVOptions &Opts) : Options(Opts) {}
+ GCOVProfiler()
+ : GCOVProfiler(GCOVOptions::getDefault(), *vfs::getRealFileSystem()) {}
+ GCOVProfiler(const GCOVOptions &Opts, vfs::FileSystem &VFS)
+ : Options(Opts), VFS(VFS) {}
bool
runOnModule(Module &M, function_ref<BlockFrequencyInfo *(Function &F)> GetBFI,
function_ref<BranchProbabilityInfo *(Function &F)> GetBPI,
@@ -110,6 +113,7 @@ public:
os->write_zeros(4 - s.size() % 4);
}
void writeBytes(const char *Bytes, int Size) { os->write(Bytes, Size); }
+ vfs::FileSystem &getVirtualFileSystem() const { return VFS; }
private:
// Create the .gcno files for the Module based on DebugInfo.
@@ -166,6 +170,7 @@ private:
std::vector<Regex> ExcludeRe;
DenseSet<const BasicBlock *> ExecBlocks;
StringMap<bool> InstrumentedFiles;
+ vfs::FileSystem &VFS;
};
struct BBInfo {
@@ -214,10 +219,10 @@ static StringRef getFunctionName(const DISubprogram *SP) {
/// Prefer relative paths in the coverage notes. Clang also may split
/// up absolute paths into a directory and filename component. When
/// the relative path doesn't exist, reconstruct the absolute path.
-static SmallString<128> getFilename(const DIScope *SP) {
+static SmallString<128> getFilename(const DIScope *SP, vfs::FileSystem &VFS) {
SmallString<128> Path;
StringRef RelPath = SP->getFilename();
- if (sys::fs::exists(RelPath))
+ if (VFS.exists(RelPath))
Path = RelPath;
else
sys::path::append(Path, SP->getDirectory(), SP->getFilename());
@@ -357,7 +362,7 @@ namespace {
void writeOut(uint32_t CfgChecksum) {
write(GCOV_TAG_FUNCTION);
- SmallString<128> Filename = getFilename(SP);
+ SmallString<128> Filename = getFilename(SP, P->getVirtualFileSystem());
uint32_t BlockLen = 3 + wordsOfString(getFunctionName(SP));
BlockLen += 1 + wordsOfString(Filename) + 4;
@@ -455,7 +460,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
if (FilterRe.empty() && ExcludeRe.empty()) {
return true;
}
- SmallString<128> Filename = getFilename(F.getSubprogram());
+ SmallString<128> Filename = getFilename(F.getSubprogram(), VFS);
auto It = InstrumentedFiles.find(Filename);
if (It != InstrumentedFiles.end()) {
return It->second;
@@ -467,7 +472,7 @@ bool GCOVProfiler::isFunctionInstrumented(const Function &F) {
// Path can be
// /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/bits/*.h so for
// such a case we must get the real_path.
- if (sys::fs::real_path(Filename, RealPath)) {
+ if (VFS.getRealPath(Filename, RealPath)) {
// real_path can fail with path like "foo.c".
RealFilename = Filename;
} else {
@@ -524,9 +529,10 @@ std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
SmallString<128> Filename = CU->getFilename();
sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
StringRef FName = sys::path::filename(Filename);
- SmallString<128> CurPath;
- if (sys::fs::current_path(CurPath))
+ ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
+ if (!CWD)
return std::string(FName);
+ SmallString<128> CurPath{*CWD};
sys::path::append(CurPath, FName);
return std::string(CurPath);
}
@@ -554,7 +560,7 @@ bool GCOVProfiler::runOnModule(
PreservedAnalyses GCOVProfilerPass::run(Module &M,
ModuleAnalysisManager &AM) {
- GCOVProfiler Profiler(GCOVOpts);
+ GCOVProfiler Profiler(GCOVOpts, *VFS);
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -789,7 +795,7 @@ bool GCOVProfiler::emitProfileNotes(
// Add the function line number to the lines of the entry block
// to have a counter for the function definition.
uint32_t Line = SP->getLine();
- auto Filename = getFilename(SP);
+ auto Filename = getFilename(SP, VFS);
BranchProbabilityInfo *BPI = GetBPI(F);
BlockFrequencyInfo *BFI = GetBFI(F);
@@ -881,7 +887,7 @@ bool GCOVProfiler::emitProfileNotes(
if (SP != getDISubprogram(Scope))
continue;
- GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope()));
+ GCOVLines &Lines = Block.getFile(getFilename(Loc->getScope(), VFS));
Lines.addLine(Loc.getLine());
}
Line = 0;
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 065622e..c547662 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1100,7 +1100,9 @@ class BinOpSameOpcodeHelper {
// constant + x cannot be -constant - x
// instead, it should be x - -constant
if (Pos == 1 ||
- (FromOpcode == Instruction::Add && ToOpcode == Instruction::Sub))
+ ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or ||
+ FromOpcode == Instruction::Xor) &&
+ ToOpcode == Instruction::Sub))
return SmallVector<Value *>({LHS, RHS});
return SmallVector<Value *>({RHS, LHS});
}
@@ -1188,6 +1190,10 @@ public:
if (CIValue.isAllOnes())
InterchangeableMask = CanBeAll;
break;
+ case Instruction::Xor:
+ if (CIValue.isZero())
+ InterchangeableMask = XorBIT | OrBIT | AndBIT | SubBIT | AddBIT;
+ break;
default:
if (CIValue.isZero())
InterchangeableMask = CanBeAll;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
index bec3349..3590c4d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll
@@ -62,15 +62,12 @@ define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1)
define <4 x float> @freeze_insertps(<4 x float> %a0, <4 x float> %a1) {
; SSE-LABEL: freeze_insertps:
; SSE: # %bb.0:
-; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE-NEXT: insertps {{.*#+}} xmm1 = xmm0[1],xmm1[1,2,3]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: freeze_insertps:
; AVX: # %bb.0:
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],xmm1[1,2,3]
+; AVX-NEXT: vmovaps %xmm1, %xmm0
; AVX-NEXT: retq
%s0 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 16)
%f0 = freeze <4 x float> %s0
diff --git a/llvm/test/Transforms/InstCombine/preserve-profile.ll b/llvm/test/Transforms/InstCombine/preserve-profile.ll
index dd83805..8cb3e68 100644
--- a/llvm/test/Transforms/InstCombine/preserve-profile.ll
+++ b/llvm/test/Transforms/InstCombine/preserve-profile.ll
@@ -46,9 +46,59 @@ define i32 @NegBin(i1 %C) !prof !0 {
ret i32 %V
}
+define i32 @select_C_minus_1_or_C_from_bool(i1 %x) !prof !0 {
+; CHECK-LABEL: define i32 @select_C_minus_1_or_C_from_bool(
+; CHECK-SAME: i1 [[X:%.*]]) !prof [[PROF0]] {
+; CHECK-NEXT: [[ADD:%.*]] = select i1 [[X]], i32 41, i32 42, !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %ext = sext i1 %x to i32
+ %add = add i32 %ext, 42
+ ret i32 %add
+}
+
+define i5 @and_add(i1 %x, i1 %y) !prof !0 {
+; CHECK-LABEL: define i5 @and_add(
+; CHECK-SAME: i1 [[X:%.*]], i1 [[Y:%.*]]) !prof [[PROF0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X]], true
+; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[Y]], [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP2]], i5 -2, i5 0, !prof [[PROF2]]
+; CHECK-NEXT: ret i5 [[R]]
+;
+ %xz = zext i1 %x to i5
+ %ys = sext i1 %y to i5
+ %sub = add i5 %xz, %ys
+ %r = and i5 %sub, 30
+ ret i5 %r
+}
+
+define i32 @add_zext_zext_i1(i1 %a) !prof !0 {
+; CHECK-LABEL: define i32 @add_zext_zext_i1(
+; CHECK-SAME: i1 [[A:%.*]]) !prof [[PROF0]] {
+; CHECK-NEXT: [[ADD:%.*]] = select i1 [[A]], i32 2, i32 0, !prof [[PROF2]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %zext = zext i1 %a to i32
+ %add = add i32 %zext, %zext
+ ret i32 %add
+}
+
+define i32 @no_count_no_branch_weights(i1 %a) {
+; CHECK-LABEL: define i32 @no_count_no_branch_weights(
+; CHECK-SAME: i1 [[A:%.*]]) {
+; CHECK-NEXT: [[ADD:%.*]] = select i1 [[A]], i32 2, i32 0
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %zext = zext i1 %a to i32
+ %add = add i32 %zext, %zext
+ ret i32 %add
+}
+
+
!0 = !{!"function_entry_count", i64 1000}
!1 = !{!"branch_weights", i32 2, i32 3}
;.
; CHECK: [[PROF0]] = !{!"function_entry_count", i64 1000}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+; CHECK: [[PROF2]] = !{!"unknown", !"instcombine"}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 8e9cb23..75420d4 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
; RUN: opt -p loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
declare void @llvm.assume(i1)
@@ -47,29 +47,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -123,27 +102,8 @@ define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ]
@@ -216,29 +176,8 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 2) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -312,29 +251,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -408,29 +326,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -504,29 +401,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -596,29 +472,8 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -692,29 +547,8 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef %
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
br label %loop.header
@@ -747,7 +581,7 @@ exit:
define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %N) nofree nosync{
; CHECK-LABEL: define void @deref_assumption_in_header_variable_trip_count(
; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -792,30 +626,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
;
entry:
br label %loop.header
@@ -867,28 +679,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noali
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ]
@@ -958,28 +750,8 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 1
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ]
@@ -1031,28 +803,8 @@ define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ]
@@ -1105,28 +857,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ]
@@ -1196,28 +928,8 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 4000) ]
@@ -1287,28 +999,8 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %a, i64 3999) ]
@@ -1376,27 +1068,8 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4) ]
@@ -1465,27 +1138,8 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
%a = call ptr @get_ptr()
@@ -1530,10 +1184,10 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD1]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
+; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
@@ -1542,7 +1196,7 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
@@ -1551,36 +1205,16 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context(p
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ]
@@ -1621,19 +1255,14 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]]
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4), "dereferenceable"(ptr [[TMP1]], i64 4) ]
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP2]], i64 4), "dereferenceable"(ptr [[TMP2]], i64 4) ]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP17]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
@@ -1642,7 +1271,8 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
@@ -1652,33 +1282,11 @@ define void @deref_assumption_in_header_constant_trip_count_may_free(ptr noalias
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP15]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br label %[[EXIT:.*]]
-; CHECK: [[SCALAR_PH:.*]]:
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
-; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[GEP_A]], i64 4), "dereferenceable"(ptr [[GEP_A]], i64 4) ]
-; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
-; CHECK-NEXT: [[L_B:%.*]] = load i32, ptr [[GEP_B]], align 4
-; CHECK-NEXT: [[C_1:%.*]] = icmp sge i32 [[L_B]], 0
-; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[LOOP_THEN:.*]]
-; CHECK: [[LOOP_THEN]]:
-; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4
-; CHECK-NEXT: br label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_A]], %[[LOOP_THEN]] ], [ [[L_B]], %[[LOOP_HEADER]] ]
-; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
-; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_C]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: ret void
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
;
entry:
call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ]
@@ -1688,7 +1296,6 @@ entry:
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.a = getelementptr i32, ptr %a, i64 %iv
- call void @llvm.assume(i1 true) [ "align"(ptr %gep.a, i64 4), "dereferenceable"(ptr %gep.a, i64 4) ]
%gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
%l.b = load i32, ptr %gep.b, align 4
%c.1 = icmp sge i32 %l.b, 0
@@ -1709,27 +1316,163 @@ loop.latch:
exit:
ret void
}
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
-; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
-; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
-; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
-; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
-; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]], [[META2]]}
-; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
-; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]}
-; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
-; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]}
-; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
-; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]}
-;.
+
+define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_but_missing_nosync(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 4
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 4
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i1 %pre) nosync {
+; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_multiple_loop_predecessors(
+; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i1 [[PRE:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ]
+; CHECK-NEXT: br i1 [[PRE]], label %[[THEN:.*]], label %[[ELSE:.*]]
+; CHECK: [[THEN]]:
+; CHECK-NEXT: store i32 0, ptr [[A]], align 4
+; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER:.*]]
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: store i32 0, ptr [[B]], align 4
+; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]]
+; CHECK: [[LOOP_HEADER_PREHEADER]]:
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
+; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
+; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br [[EXIT:label %.*]]
+; CHECK: [[SCALAR_PH:.*:]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "align"(ptr %a, i64 4), "dereferenceable"(ptr %a, i64 4000) ]
+ br i1 %pre, label %then, label %else
+
+then:
+ store i32 0, ptr %a
+ br label %loop.header
+
+else:
+ store i32 0, ptr %b
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %then ], [ 0, %else ], [ %iv.next, %loop.latch ]
+ %gep.a = getelementptr i32, ptr %a, i64 %iv
+ %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv
+ %l.b = load i32, ptr %gep.b, align 4
+ %c.1 = icmp sge i32 %l.b, 0
+ br i1 %c.1, label %loop.latch, label %loop.then
+
+loop.then:
+ %l.a = load i32, ptr %gep.a, align 4
+ br label %loop.latch
+
+loop.latch:
+ %merge = phi i32 [ %l.a, %loop.then ], [ %l.b, %loop.header ]
+ %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv
+ store i32 %merge, ptr %gep.c, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, 1000
+ br i1 %ec, label %exit, label %loop.header
+
+exit:
+ ret void
+}
+
+
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll b/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll
index 7664fda..9cdcdf1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/xor-combined-opcode.ll
@@ -6,7 +6,7 @@ define i1 @foo(i1 %v) { ; assume %v is 1
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> poison, i1 [[V]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i1> <i1 false, i1 true>, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> zeroinitializer, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: [[SUB:%.*]] = sub i1 [[TMP3]], [[TMP4]]
diff --git a/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml b/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml
index 5312c25..17e91f1 100644
--- a/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml
+++ b/llvm/test/tools/llvm-dwarfdump/verify_stmt_seq.yaml
@@ -2,9 +2,9 @@
# Then manually tempered with some of the value of the attribute
# I hope there are easier ways to construct tests like this.
-# RUN: yaml2obj %s -o verify_stmt_seq.o
-# RUN: not llvm-dwarfdump -verify -debug-info verify_stmt_seq.o | FileCheck %s --check-prefix=CHECK_INVALID --implicit-check-not=error:
-# RUN: llvm-dwarfdump -debug-line -verbose -debug-info verify_stmt_seq.o | FileCheck %s --check-prefix=CHECK_DEBUG_LINE
+# RUN: yaml2obj %s -o %t.o
+# RUN: not llvm-dwarfdump -verify -debug-info %t.o | FileCheck %s --check-prefix=CHECK_INVALID --implicit-check-not=error:
+# RUN: llvm-dwarfdump -debug-line -verbose -debug-info %t.o | FileCheck %s --check-prefix=CHECK_DEBUG_LINE
# CHECK_INVALID: error: DW_AT_LLVM_stmt_sequence offset 0x00000000 is not within the line table bounds [0x00000034, 0x000000fd)
# CHECK_INVALID: DW_AT_LLVM_stmt_sequence [DW_FORM_sec_offset] (0x00000000)
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test
new file mode 100644
index 0000000..391b7ee
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading-fail.test
@@ -0,0 +1,26 @@
+## Test that --offloading with a fatbin works correctly.
+# REQUIRES: amdgpu-registered-target
+
+# RUN: yaml2obj %s -o %t.elf
+# RUN: llvm-readobj --offloading %t.elf 2>&1 | \
+# RUN: FileCheck %s --check-prefix=WARN -DFILE_NAME=%t.elf
+
+# WARN: warning: '{{.*}}': Stream Error: The stream is too short to perform the requested operation.
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+Sections:
+ - Name: .hip_fatbin
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1000
+ Content: 5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B0000000000000075782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000
+ - Name: .hipFatBinSegment
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x202FD0
+ AddressAlign: 0x8
+ Content: '465049480100000000102000000000000000000000000000'
+...
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test
new file mode 100644
index 0000000..21ee60d
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/offloading.test
@@ -0,0 +1,27 @@
+## Test that --offloading with a fatbin works correctly.
+# REQUIRES: amdgpu-registered-target
+
+# RUN: yaml2obj %s -o %t.elf
+# RUN: llvm-readobj --offloading %t.elf | \
+# RUN: FileCheck %s -DFILE_NAME=%t.elf
+
+# CHECK: host-x86_64-unknown-linux-- file://[[FILE_NAME]]#offset=8192&size=0
+# CHECK-NEXT: hipv4-amdgcn-amd-amdhsa--gfx908 file://[[FILE_NAME]]#offset=8192&size=4048
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+Sections:
+ - Name: .hip_fatbin
+ Type: SHT_PROGBITS
+ AddressAlign: 0x1000
+ Content: 5F5F434C414E475F4F46464C4F41445F42554E444C455F5F0200000000000000001000000000000000000000000000001B00000000000000686F73742D7838365F36342D756E6B6E6F776E2D6C696E75782D2D0010000000000000D00F0000000000001F0000000000000068697076342D616D6467636E2D616D642D616D646873612D2D676678393038000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007F454C460201014003000000000000000300E0000100000000000000000000004000000000000000100C0000000000003005000040003800090040000F000D000600000004000000400000000000000040000000000000004000000000000000F801000000000000F80100000000000008000000000000000100000004000000000000000000000000000000000000000000000000000000C008000000000000C008000000000000001000000000000001000000050000000009000000000000001900000000000000190000000000006C000000000000006C00000000000000001000000000000001000000060000007009000000000000702900000000000070290000000000007000000000000000900600000000000000100000000000000100000006000000E009000000000000E039000000000000E039000000000000000000000000000001000000000000000010000000000000020000000600000070090000000000007029000000000000702900000000000070000000000000007000000000000000080000000000000052E574640400000070090000000000007029000000000000702900000000000070000000000000009006000000000000010000000000000051E57464060000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000400000004000000380200000000000038020000000000003802000000000000340500000000000034050000000000000400000000000000070000001D05000020000000414D44475055000083AE616D646873612E6B65726E656C7391DE0012AB2E616770725F636F756E7400A52E61726773DC001085AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA415F642E636F65726365A72E6F666673657400A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657285AE2E616464726573735F7370616365A6676C6F62616CA52E6E616D65AA425F642E636F65726365A72E6F666673657408A52E73697A6508AB2E76616C75655F6B696E64AD676C6F62616C5F62756666657284A52E6E616D65A14EA72E6F666673657410A52E73697A6508AB2E76616C75655F6B696E64A862795F76616C756583A72E6F666673657418A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7883A72E6F66667365741CA52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7983A72E6F666673657420A52E73697A6504AB2E76616C75655F6B696E64B468696464656E5F626C6F636B5F636F756E745F7A83A72E6F666673657424A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7883A72E6F666673657426A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7983A72E6F666673657428A52E73697A6502AB2E76616C75655F6B696E64B368696464656E5F67726F75705F73697A655F7A83A72E6F66667365742AA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7883A72E6F66667365742CA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7983A72E6F66667365742EA52E73697A6502AB2E76616C75655F6B696E64B268696464656E5F72656D61696E6465725F7A83A72E6F666673657440A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7883A72E6F666673657448A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7983A72E6F666673657450A52E73697A6508AB2E76616C75655F6B696E64B668696464656E5F676C6F62616C5F6F66667365745F7A83A72E6F666673657458A52E73697A6502AB2E76616C75655F6B696E64B068696464656E5F677269645F64696D73B92E67726F75705F7365676D656E745F66697865645F73697A6500B62E6B65726E6172675F7365676D656E745F616C69676E08B52E6B65726E6172675F7365676D656E745F73697A65CD0118A92E6C616E6775616765A84F70656E434C2043B12E6C616E67756167655F76657273696F6E920200B82E6D61785F666C61745F776F726B67726F75705F73697A65CD0400A52E6E616D65B25F5A3973696D706C65416464506A504B6A6DBB2E707269766174655F7365676D656E745F66697865645F73697A6500AB2E736770725F636F756E740CB12E736770725F7370696C6C5F636F756E7400A72E73796D626F6CB55F5A3973696D706C65416464506A504B6A6D2E6B64B82E756E69666F726D5F776F726B5F67726F75705F73697A6501B32E757365735F64796E616D69635F737461636BC2AB2E766770725F636F756E7404B12E766770725F7370696C6C5F636F756E7400AF2E7761766566726F6E745F73697A6540AD616D646873612E746172676574B9616D6467636E2D616D642D616D646873612D2D676678393038AE616D646873612E76657273696F6E92010200000000000000000000000000000000000000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E03900000000000001000000000000000100000001000000010000001A000000000008400000D20001000000360A4A7A5238A4D3F113F4DD04000000040000000200000001000000000000000300000000000000000000000000000000000000005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F623730363264386333326134613933330000000000000000000000000000000000000000000000000000000000000000000000180100000000000080100000000000000000000000000000000000000000000000000000000000004000AF008C000000090000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000C20102C02400000002000AC0000000008002027E7FC08CBF07FF0486FFFF0000060406920600006800008FD2820002000302067E0200043203030638008050DC02007F020102067E0000003203030238008050DC00007F03700F8CBF03050468008070DC00027F00000081BF00000000060000000000000070070000000000000B000000000000001800000000000000050000000000000020080000000000000A000000000000004600000000000000F5FEFF6F00000000D0070000000000000400000000000000F807000000000000000000000000000000000000000000004C696E6B65723A20414D44204C4C442031392E302E3000414D4420636C616E672076657273696F6E2031392E302E306769742028202032343231322063393630313665636534313337356462646438663037356266333762643666633333323230376233290000414D4420636C616E672076657273696F6E2031382E302E3067697420287373683A2F2F6765727269746769742F6C696768746E696E672F65632F6C6C766D2D70726F6A65637420616D642D6D61696E6C696E652D6F70656E20323431373620663935303039613166393032313232343865313036333964653837653635636163616338643961372900000000000000000000000000000000000000000000000000460000000002080070290000000000000000000000000000010000001203070000190000000000006C000000000000001400000011030600800800000000000040000000000000002A00000011000A00E0390000000000000100000000000000002E6E6F7465002E64796E73796D002E676E752E68617368002E68617368002E64796E737472002E726F64617461002E74657874002E64796E616D6963002E72656C726F5F70616464696E67002E627373002E636F6D6D656E74002E73796D746162002E7368737472746162002E73747274616200005F5A3973696D706C65416464506A504B6A6D005F5A3973696D706C65416464506A504B6A6D2E6B64005F5F6869705F637569645F62373036326438633332613461393333005F44594E414D494300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000070000000200000000000000380200000000000038020000000000003405000000000000000000000000000004000000000000000000000000000000070000000B00000002000000000000007007000000000000700700000000000060000000000000000500000001000000080000000000000018000000000000000F000000F6FFFF6F0200000000000000D007000000000000D007000000000000280000000000000002000000000000000800000000000000000000000000000019000000050000000200000000000000F807000000000000F80700000000000028000000000000000200000000000000040000000000000004000000000000001F000000030000000200000000000000200800000000000020080000000000004600000000000000000000000000000001000000000000000000000000000000270000000100000002000000000000008008000000000000800800000000000040000000000000000000000000000000400000000000000000000000000000002F000000010000000600000000000000001900000000000000090000000000006C00000000000000000000000000000000010000000000000000000000000000350000000600000003000000000000007029000000000000700900000000000070000000000000000500000000000000080000000000000010000000000000003E000000080000000300000000000000E029000000000000E00900000000000020060000000000000000000000000000010000000000000000000000000000004D000000080000000300000000000000E039000000000000E0090000000000000100000000000000000000000000000001000000000000000000000000000000520000000100000030000000000000000000000000000000E009000000000000F0000000000000000000000000000000010000000000000001000000000000005B0000000200000000000000000000000000000000000000D00A00000000000078000000000000000E0000000200000008000000000000001800000000000000630000000300000000000000000000000000000000000000480B00000000000075000000000000000000000000000000010000000000000000000000000000006D0000000300000000000000000000000000000000000000BD0B0000000000004F00000000000000000000000000000001000000000000000000000000000000
+ - Name: .hipFatBinSegment
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC ]
+ Address: 0x202FD0
+ AddressAlign: 0x8
+ Content: '465049480100000000102000000000000000000000000000'
+...
diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp
index bd670ae..0b59dd4 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -16,6 +16,8 @@
#include "llvm/Object/Archive.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Object/OffloadBundle.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
@@ -230,4 +232,14 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj,
}
}
+void ObjDumper::printOffloading(const object::ObjectFile &Obj) {
+ SmallVector<llvm::object::OffloadBundleFatBin> Bundles;
+ if (Error Err = object::extractOffloadBundleFatBinary(Obj, Bundles))
+ reportWarning(std::move(Err), Obj.getFileName());
+
+ // Print out all the FatBin Bundles that are contained in this buffer.
+ for (const auto &[Index, Bundle] : llvm::enumerate(Bundles))
+ Bundle.printEntriesAsURI();
+}
+
} // namespace llvm
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index a654078..d264394 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/OffloadBinary.h"
#include "llvm/Support/CommandLine.h"
#include <unordered_set>
@@ -188,6 +189,7 @@ public:
std::function<Error(const Twine &Msg)> WarningHandler;
void reportUniqueWarning(Error Err) const;
void reportUniqueWarning(const Twine &Msg) const;
+ void printOffloading(const object::ObjectFile &Obj);
protected:
ScopedPrinter &W;
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index 711522c..97d5d7f 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -32,6 +32,7 @@ def file_header : FF<"file-header", "Display file header">;
def headers : FF<"headers", "Equivalent to setting: --file-header, --program-headers, --section-headers">;
defm hex_dump : Eq<"hex-dump", "Display the specified section(s) as hexadecimal bytes">, MetaVarName<"<name or index>">;
def pretty_print : FF<"pretty-print", "Pretty print JSON output">;
+def offloading : FF<"offloading", "Display the content of the offloading section">;
def relocs : FF<"relocs", "Display the relocation entries in the file">;
def section_data : FF<"section-data", "Display section data for each section shown. This option has no effect for GNU style output">;
def section_details : FF<"section-details", "Display the section details">;
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 2b34761..5327731 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -135,6 +135,7 @@ static bool HashHistogram;
static bool Memtag;
static bool NeededLibraries;
static bool Notes;
+static bool Offloading;
static bool ProgramHeaders;
static bool SectionGroups;
static std::vector<std::string> SFrame;
@@ -274,6 +275,7 @@ static void parseOptions(const opt::InputArgList &Args) {
opts::Memtag = Args.hasArg(OPT_memtag);
opts::NeededLibraries = Args.hasArg(OPT_needed_libs);
opts::Notes = Args.hasArg(OPT_notes);
+ opts::Offloading = Args.hasArg(OPT_offloading);
opts::PrettyPrint = Args.hasArg(OPT_pretty_print);
opts::ProgramHeaders = Args.hasArg(OPT_program_headers);
opts::SectionGroups = Args.hasArg(OPT_section_groups);
@@ -459,6 +461,8 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
Dumper->printGnuHashTable();
if (opts::VersionInfo)
Dumper->printVersionInfo();
+ if (opts::Offloading)
+ Dumper->printOffloading(Obj);
if (opts::StringTable)
Dumper->printStringTable();
if (Obj.isELF()) {
@@ -707,6 +711,7 @@ int llvm_readobj_main(int argc, char **argv, const llvm::ToolContext &) {
opts::DynamicTable = true;
opts::Notes = true;
opts::VersionInfo = true;
+ opts::Offloading = true;
opts::UnwindInfo = true;
opts::SectionGroups = true;
opts::HashHistogram = true;
diff --git a/llvm/unittests/CAS/CMakeLists.txt b/llvm/unittests/CAS/CMakeLists.txt
index ab709e3..0f8fcb9 100644
--- a/llvm/unittests/CAS/CMakeLists.txt
+++ b/llvm/unittests/CAS/CMakeLists.txt
@@ -1,7 +1,3 @@
-if (LLVM_ENABLE_ONDISK_CAS)
- add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
-endif()
-
set(LLVM_LINK_COMPONENTS
Support
CAS
@@ -12,6 +8,7 @@ add_llvm_unittest(CASTests
ActionCacheTest.cpp
CASTestConfig.cpp
ObjectStoreTest.cpp
+ OnDiskTrieRawHashMapTest.cpp
ProgramTest.cpp
)
diff --git a/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp
new file mode 100644
index 0000000..7bedfe4
--- /dev/null
+++ b/llvm/unittests/CAS/OnDiskTrieRawHashMapTest.cpp
@@ -0,0 +1,220 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+#if LLVM_ENABLE_ONDISK_CAS
+using namespace llvm;
+using namespace llvm::cas;
+
+namespace {
+
+struct OnDiskTrieRawHashMapTestFixture
+ : public ::testing::TestWithParam<size_t> {
+ static constexpr size_t MB = 1024u * 1024u;
+ static constexpr size_t DataSize = 8; // Multiple of 8B.
+
+ std::optional<unittest::TempDir> Temp;
+ size_t NumHashBytes;
+
+ void SetUp() override {
+ Temp.emplace("trie-raw-hash-map", /*Unique=*/true);
+ NumHashBytes = GetParam();
+ }
+ void TearDown() override { Temp.reset(); }
+
+ Expected<OnDiskTrieRawHashMap> createTrie() {
+ size_t NumHashBits = NumHashBytes * 8;
+ return OnDiskTrieRawHashMap::create(
+ Temp->path((Twine(NumHashBytes) + "B").str()), "index",
+ /*NumHashBits=*/NumHashBits, DataSize, /*MaxFileSize=*/MB,
+ /*NewInitialFileSize=*/std::nullopt);
+ }
+};
+
+// Create tries with various sizes of hash and with data.
+TEST_P(OnDiskTrieRawHashMapTestFixture, General) {
+ std::optional<OnDiskTrieRawHashMap> Trie1;
+ ASSERT_THAT_ERROR(createTrie().moveInto(Trie1), Succeeded());
+ std::optional<OnDiskTrieRawHashMap> Trie2;
+ ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded());
+
+ uint8_t Hash0Bytes[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+ uint8_t Hash1Bytes[8] = {1, 0, 0, 0, 0, 0, 0, 0};
+ auto Hash0 = ArrayRef(Hash0Bytes).take_front(NumHashBytes);
+ auto Hash1 = ArrayRef(Hash1Bytes).take_front(NumHashBytes);
+ constexpr StringLiteral Data0v1Bytes = "data0.v1";
+ constexpr StringLiteral Data0v2Bytes = "data0.v2";
+ constexpr StringLiteral Data1Bytes = "data1...";
+ static_assert(Data0v1Bytes.size() == DataSize, "math error");
+ static_assert(Data0v2Bytes.size() == DataSize, "math error");
+ static_assert(Data1Bytes.size() == DataSize, "math error");
+ ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size());
+ ArrayRef<char> Data0v2 = ArrayRef(Data0v2Bytes.data(), Data0v2Bytes.size());
+ ArrayRef<char> Data1 = ArrayRef(Data1Bytes.data(), Data1Bytes.size());
+
+ // Lookup when trie is empty.
+ EXPECT_FALSE(Trie1->find(Hash0));
+
+ // Insert.
+ std::optional<FileOffset> Offset;
+ std::optional<MutableArrayRef<char>> Data;
+ {
+ std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ ASSERT_THAT_ERROR(Trie1->insert({Hash0, Data0v1}).moveInto(Insertion),
+ Succeeded());
+ EXPECT_EQ(Hash0, (*Insertion)->Hash);
+ EXPECT_EQ(Data0v1, (*Insertion)->Data);
+ EXPECT_TRUE(isAddrAligned(Align(8), (*Insertion)->Data.data()));
+
+ Offset = Insertion->getOffset();
+ Data = (*Insertion)->Data;
+ }
+
+ // Find.
+ {
+ auto Lookup = Trie1->find(Hash0);
+ ASSERT_TRUE(Lookup);
+ EXPECT_EQ(Hash0, Lookup->Hash);
+ EXPECT_EQ(Data0v1, Lookup->Data);
+ EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
+ }
+
+ // Find in a different instance of the same on-disk trie that existed
+ // before the insertion.
+ {
+ auto Lookup = Trie2->find(Hash0);
+ ASSERT_TRUE(Lookup);
+ EXPECT_EQ(Hash0, Lookup->Hash);
+ EXPECT_EQ(Data0v1, Lookup->Data);
+ EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
+ }
+
+ // Create a new instance and check that too.
+ Trie2.reset();
+ ASSERT_THAT_ERROR(createTrie().moveInto(Trie2), Succeeded());
+ {
+ auto Lookup = Trie2->find(Hash0);
+ ASSERT_TRUE(Lookup);
+ EXPECT_EQ(Hash0, Lookup->Hash);
+ EXPECT_EQ(Data0v1, Lookup->Data);
+ EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
+ }
+
+ // Change the data.
+ llvm::copy(Data0v2, Data->data());
+ {
+ auto Lookup = Trie2->find(Hash0);
+ ASSERT_TRUE(Lookup);
+ EXPECT_EQ(Hash0, Lookup->Hash);
+ EXPECT_EQ(Data0v2, Lookup->Data);
+ EXPECT_EQ(Offset->get(), Lookup.getOffset().get());
+ }
+
+ // Find different hash.
+ EXPECT_FALSE(Trie1->find(Hash1));
+ EXPECT_FALSE(Trie2->find(Hash1));
+
+ // Recover from an offset.
+ {
+ OnDiskTrieRawHashMap::const_pointer Recovered;
+ ASSERT_THAT_ERROR(Trie1->recoverFromFileOffset(*Offset).moveInto(Recovered),
+ Succeeded());
+ ASSERT_TRUE(Recovered);
+ EXPECT_EQ(Offset->get(), Recovered.getOffset().get());
+ EXPECT_EQ(Hash0, Recovered->Hash);
+ EXPECT_EQ(Data0v2, Recovered->Data);
+ }
+
+ // Recover from a bad offset.
+ {
+ FileOffset BadOffset(1);
+ OnDiskTrieRawHashMap::const_pointer Recovered;
+ ASSERT_THAT_ERROR(
+ Trie1->recoverFromFileOffset(BadOffset).moveInto(Recovered), Failed());
+ }
+
+ // Insert another thing.
+ {
+ std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ ASSERT_THAT_ERROR(Trie1->insert({Hash1, Data1}).moveInto(Insertion),
+ Succeeded());
+ EXPECT_EQ(Hash1, (*Insertion)->Hash);
+ EXPECT_EQ(Data1, (*Insertion)->Data);
+ EXPECT_TRUE(isAddrAligned(Align(8), (*Insertion)->Data.data()));
+
+ EXPECT_NE(Offset->get(), Insertion->getOffset().get());
+ }
+
+ // Validate.
+ {
+ auto RecordVerify =
+ [&](FileOffset Offset,
+ OnDiskTrieRawHashMap::ConstValueProxy Proxy) -> Error {
+ if (Proxy.Hash.size() != NumHashBytes)
+ return createStringError("wrong hash size");
+ if (Proxy.Data.size() != DataSize)
+ return createStringError("wrong data size");
+
+ return Error::success();
+ };
+ ASSERT_THAT_ERROR(Trie1->validate(RecordVerify), Succeeded());
+ ASSERT_THAT_ERROR(Trie2->validate(RecordVerify), Succeeded());
+ }
+
+ // Size and capacity.
+ {
+ EXPECT_EQ(Trie1->capacity(), MB);
+ EXPECT_EQ(Trie2->capacity(), MB);
+ EXPECT_LE(Trie1->size(), MB);
+ EXPECT_LE(Trie2->size(), MB);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(OnDiskTrieRawHashMapTest,
+ OnDiskTrieRawHashMapTestFixture,
+ ::testing::Values(1, 2, 4, 8));
+
+TEST(OnDiskTrieRawHashMapTest, OutOfSpace) {
+ unittest::TempDir Temp("trie-raw-hash-map", /*Unique=*/true);
+ std::optional<OnDiskTrieRawHashMap> Trie;
+
+ // Too small to create header.
+ ASSERT_THAT_ERROR(OnDiskTrieRawHashMap::create(
+ Temp.path("NoSpace1").str(), "index",
+ /*NumHashBits=*/8, /*DataSize=*/8, /*MaxFileSize=*/8,
+ /*NewInitialFileSize=*/std::nullopt)
+ .moveInto(Trie),
+ Failed());
+
+ // Just enough for root node but not enough for any insertion.
+ ASSERT_THAT_ERROR(OnDiskTrieRawHashMap::create(
+ Temp.path("NoSpace2").str(), "index",
+ /*NumHashBits=*/8, /*DataSize=*/8, /*MaxFileSize=*/118,
+ /*NewInitialFileSize=*/std::nullopt,
+ /*NewTableNumRootBits=*/1, /*NewTableNumSubtrieBits=*/1)
+ .moveInto(Trie),
+ Succeeded());
+ uint8_t Hash0Bytes[1] = {0};
+ auto Hash0 = ArrayRef(Hash0Bytes);
+ constexpr StringLiteral Data0v1Bytes = "data0.v1";
+ ArrayRef<char> Data0v1 = ArrayRef(Data0v1Bytes.data(), Data0v1Bytes.size());
+ std::optional<OnDiskTrieRawHashMap::pointer> Insertion;
+ ASSERT_THAT_ERROR(Trie->insert({Hash0, Data0v1}).moveInto(Insertion),
+ Failed());
+}
+
+} // namespace
+
+#endif // LLVM_ENABLE_ONDISK_CAS
diff --git a/llvm/unittests/Support/MustacheTest.cpp b/llvm/unittests/Support/MustacheTest.cpp
index 02eaed4..0ebbc58 100644
--- a/llvm/unittests/Support/MustacheTest.cpp
+++ b/llvm/unittests/Support/MustacheTest.cpp
@@ -998,7 +998,7 @@ TEST(MustachePartials, StandaloneIndentation) {
std::string Out;
raw_string_ostream OS(Out);
T.render(D, OS);
- EXPECT_NE("\\\n |\n <\n ->\n |\n/\n", Out);
+ EXPECT_EQ("\\\n |\n <\n ->\n |\n/\n", Out);
}
TEST(MustacheLambdas, BasicInterpolation) {
@@ -1328,3 +1328,139 @@ TEST(MustacheTripleMustache, WithPadding) {
T.render(D, OS);
EXPECT_EQ("|---|", Out);
}
+
+TEST(MustacheDelimiters, PairBehavior) {
+ Value D = Object{{"text", "Hey!"}};
+ auto T = Template("{{=<% %>=}}(<%text%>)");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("(Hey!)", Out);
+}
+
+TEST(MustacheDelimiters, SpecialCharacters) {
+ Value D = Object{{"text", "It worked!"}};
+ auto T = Template("({{=[ ]=}}[text])");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("(It worked!)", Out);
+}
+
+TEST(MustacheDelimiters, Sections) {
+ Value D = Object{{"section", true}, {"data", "I got interpolated."}};
+ auto T =
+ Template("[\n{{#section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= "
+ "| | =}}\n|#section|\n {{data}}\n |data|\n|/section|\n]\n");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ "interpolated.\n]\n",
+ Out);
+}
+
+TEST(MustacheDelimiters, InvertedSections) {
+ Value D = Object{{"section", false}, {"data", "I got interpolated."}};
+ auto T =
+ Template("[\n{{^section}}\n {{data}}\n |data|\n{{/section}}\n\n{{= "
+ "| | =}}\n|^section|\n {{data}}\n |data|\n|/section|\n]\n");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("[\n I got interpolated.\n |data|\n\n {{data}}\n I got "
+ "interpolated.\n]\n",
+ Out);
+}
+
+TEST(MustacheDelimiters, PartialInheritence) {
+ Value D = Object{{"value", "yes"}};
+ auto T = Template("[ {{>include}} ]\n{{= | | =}}\n[ |>include| ]\n");
+ T.registerPartial("include", ".{{value}}.");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("[ .yes. ]\n[ .yes. ]\n", Out);
+}
+
+TEST(MustacheDelimiters, PostPartialBehavior) {
+ Value D = Object{{"value", "yes"}};
+ auto T = Template("[ {{>include}} ]\n[ .{{value}}. .|value|. ]\n");
+ T.registerPartial("include", ".{{value}}. {{= | | =}} .|value|.");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("[ .yes. .yes. ]\n[ .yes. .|value|. ]\n", Out);
+}
+
+TEST(MustacheDelimiters, SurroundingWhitespace) {
+ Value D = Object{};
+ auto T = Template("| {{=@ @=}} |");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_EQ("| |", Out);
+}
+
+TEST(MustacheDelimiters, OutlyingWhitespaceInline) {
+ Value D = Object{};
+ auto T = Template(" | {{=@ @=}}\n");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_EQ(" | \n", Out);
+}
+
+TEST(MustacheDelimiters, StandaloneTag) {
+ Value D = Object{};
+ auto T = Template("Begin.\n{{=@ @=}}\nEnd.\n");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("Begin.\nEnd.\n", Out);
+}
+
+TEST(MustacheDelimiters, IndentedStandaloneTag) {
+ Value D = Object{};
+ auto T = Template("Begin.\n {{=@ @=}}\nEnd.\n");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("Begin.\nEnd.\n", Out);
+}
+
+TEST(MustacheDelimiters, StandaloneLineEndings) {
+ Value D = Object{};
+ auto T = Template("|\r\n{{= @ @ =}}\r\n|");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("|\r\n|", Out);
+}
+
+TEST(MustacheDelimiters, StandaloneWithoutPreviousLine) {
+ Value D = Object{};
+ auto T = Template(" {{=@ @=}}\n=");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("=", Out);
+}
+
+TEST(MustacheDelimiters, StandaloneWithoutNewline) {
+ Value D = Object{};
+ auto T = Template("=\n {{=@ @=}}");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_NE("=\n", Out);
+}
+
+TEST(MustacheDelimiters, PairwithPadding) {
+ Value D = Object{};
+ auto T = Template("|{{= @ @ =}}|");
+ std::string Out;
+ raw_string_ostream OS(Out);
+ T.render(D, OS);
+ EXPECT_EQ("||", Out);
+}
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index a2b9e56..08c8944 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -838,8 +838,6 @@ Transforms/InstCombine/2011-02-14-InfLoop.ll
Transforms/InstCombine/AArch64/sve-intrinsic-sel.ll
Transforms/InstCombine/AArch64/sve-intrinsic-simplify-binop.ll
Transforms/InstCombine/AArch64/sve-intrinsic-simplify-shift.ll
-Transforms/InstCombine/add2.ll
-Transforms/InstCombine/add.ll
Transforms/InstCombine/add-mask.ll
Transforms/InstCombine/add-shl-mul-umax.ll
Transforms/InstCombine/add-shl-sdiv-to-srem.ll
diff --git a/mlir/docs/Tutorials/Toy/Ch-6.md b/mlir/docs/Tutorials/Toy/Ch-6.md
index 529de55..178c073 100644
--- a/mlir/docs/Tutorials/Toy/Ch-6.md
+++ b/mlir/docs/Tutorials/Toy/Ch-6.md
@@ -245,7 +245,7 @@ define void @main()
```
The full code listing for dumping LLVM IR can be found in
-`examples/toy/Ch6/toy.cpp` in the `dumpLLVMIR()` function:
+`examples/toy/Ch6/toyc.cpp` in the `dumpLLVMIR()` function:
```c++
diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index 262e0e7..cc6314c 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -48,8 +48,8 @@ static bool isStaticStrideOrOffset(int64_t strideOrOffset) {
}
static FailureOr<LLVM::LLVMFuncOp>
-getFreeFn(OpBuilder &b, const LLVMTypeConverter *typeConverter, ModuleOp module,
- SymbolTableCollection *symbolTables) {
+getFreeFn(OpBuilder &b, const LLVMTypeConverter *typeConverter,
+ Operation *module, SymbolTableCollection *symbolTables) {
bool useGenericFn = typeConverter->getOptions().useGenericFunctions;
if (useGenericFn)
@@ -483,8 +483,8 @@ public:
ConversionPatternRewriter &rewriter) const override {
// Insert the `free` declaration if it is not already present.
FailureOr<LLVM::LLVMFuncOp> freeFunc =
- getFreeFn(rewriter, getTypeConverter(), op->getParentOfType<ModuleOp>(),
- symbolTables);
+ getFreeFn(rewriter, getTypeConverter(),
+ op->getParentWithTrait<OpTrait::SymbolTable>(), symbolTables);
if (failed(freeFunc))
return failure();
Value allocatedPtr;
diff --git a/mlir/test/Dialect/GPU/memref-to-llvm.mlir b/mlir/test/Dialect/GPU/memref-to-llvm.mlir
new file mode 100644
index 0000000..81a96bf
--- /dev/null
+++ b/mlir/test/Dialect/GPU/memref-to-llvm.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-opt --convert-to-llvm %s | FileCheck %s
+
+// Checking that malloc and free are declared in the proper module.
+
+// CHECK: module attributes {gpu.container_module} {
+// CHECK: llvm.func @free(!llvm.ptr)
+// CHECK: llvm.func @malloc(i64) -> !llvm.ptr
+// CHECK: gpu.module @kernels {
+// CHECK: llvm.func @free(!llvm.ptr)
+// CHECK: llvm.func @malloc(i64) -> !llvm.ptr
+// CHECK: gpu.func @kernel_1
+// CHECK: llvm.call @malloc({{.*}}) : (i64) -> !llvm.ptr
+// CHECK: llvm.call @free({{.*}}) : (!llvm.ptr) -> ()
+// CHECK: gpu.return
+// CHECK: }
+// CHECK: }
+// CHECK: }
+module attributes {gpu.container_module} {
+
+ gpu.module @kernels {
+ gpu.func @kernel_1() kernel {
+ %memref_a = memref.alloc() : memref<8x16xf32>
+ memref.dealloc %memref_a : memref<8x16xf32>
+ gpu.return
+ }
+ }
+
+ func.func @main() {
+ %memref_a = memref.alloc() : memref<8x16xf32>
+ memref.dealloc %memref_a : memref<8x16xf32>
+ return
+ }
+}
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 39286d4..a1950cb 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -330,6 +330,54 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
return Rc;
}
+/// Returns a buffer of the requested \p Size, to be used as the source for
+/// `submitData`.
+///
+/// For small buffers (`Size <= sizeof(void*)`), uses \p AsyncInfo's
+/// getVoidPtrLocation().
+/// For larger buffers, creates a dynamic buffer which will be eventually
+/// deleted by \p AsyncInfo's post-processing callback.
+static char *getOrCreateSourceBufferForSubmitData(AsyncInfoTy &AsyncInfo,
+ int64_t Size) {
+ constexpr int64_t VoidPtrSize = sizeof(void *);
+
+ if (Size <= VoidPtrSize) {
+ void *&BufferElement = AsyncInfo.getVoidPtrLocation();
+ return reinterpret_cast<char *>(&BufferElement);
+ }
+
+ // Create a dynamic buffer for larger data and schedule its deletion.
+ char *DataBuffer = new char[Size];
+ AsyncInfo.addPostProcessingFunction([DataBuffer]() {
+ delete[] DataBuffer;
+ return OFFLOAD_SUCCESS;
+ });
+ return DataBuffer;
+}
+
+/// Calculates the target pointee base by applying the host
+/// pointee begin/base delta to the target pointee begin.
+///
+/// ```
+/// TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+/// ```
+static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
+ void *TgtPteeBegin) {
+ uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
+ reinterpret_cast<uint64_t>(HstPteeBase);
+ void *TgtPteeBase = reinterpret_cast<void *>(
+ reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta);
+
+ DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD
+ ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n",
+ DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta);
+ DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD
+ "\n",
+ DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
+
+ return TgtPteeBase;
+}
+
/// Utility function to perform a pointer attachment operation.
///
/// For something like:
@@ -399,16 +447,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
constexpr int64_t VoidPtrSize = sizeof(void *);
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
- uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
- reinterpret_cast<uint64_t>(HstPteeBase);
- void *TgtPteeBase = reinterpret_cast<void *>(
- reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta);
- DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD
- ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n",
- DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta);
- DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD
- "\n",
- DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
+ void *TgtPteeBase =
+ calculateTargetPointeeBase(HstPteeBase, HstPteeBegin, TgtPteeBegin);
// Add shadow pointer tracking
if (!PtrTPR.getEntry()->addShadowPointer(
@@ -435,48 +475,32 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
return OFFLOAD_SUCCESS;
};
- bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
- if (!IsPtrAFortranDescriptor) {
- // For "regular" pointers, we can use the VoidPtrLocation from AsyncInfo as
- // the buffer space for the submission.
- void *&BufferElement = AsyncInfo.getVoidPtrLocation();
- BufferElement = TgtPteeBase;
-
- // Submit the updated pointer value to device
- return HandleSubmitResult(Device.submitData(
- TgtPtrAddr, &BufferElement, VoidPtrSize, AsyncInfo, PtrTPR.getEntry()));
+ // Get a buffer to be used as the source for data submission.
+ char *SrcBuffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize);
+
+ // The pointee's address should occupy the first VoidPtrSize bytes
+ // irrespective of HstPtrSize.
+ std::memcpy(SrcBuffer, &TgtPteeBase, VoidPtrSize);
+
+ // For larger "pointers" (e.g., Fortran descriptors), copy remaining
+ // descriptor fields from the host descriptor into the buffer.
+ if (HstPtrSize > VoidPtrSize) {
+ uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+ void *HstDescriptorFieldsAddr =
+ reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize;
+ std::memcpy(SrcBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
+ HstDescriptorFieldsSize);
+
+ DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD
+ ") (pointer + %" PRId64 " additional bytes from host descriptor " DPxMOD
+ ")\n",
+ HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
+ DPxPTR(HstDescriptorFieldsAddr));
}
- // For larger "pointers" (like Fortran's descriptors), we create a dynamic
- // buffer, which will be eventually destroyed by AsyncInfo's post-processing
- // callback.
- char *DataBuffer = new char[HstPtrSize];
-
- // For such descriptors, to the first VoidPtrSize bytes, we store the
- // pointee's device address.
- std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
-
- // And to the remaining bytes, we copy the remaining contents of the host
- // descriptor after the initial VoidPtrSize bytes.
- uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
- void *HstDescriptorFieldsAddr =
- reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize;
- std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
- HstDescriptorFieldsSize);
-
- DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
- " additional bytes from host descriptor " DPxMOD ")\n",
- HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
- DPxPTR(HstDescriptorFieldsAddr));
-
- // Submit the entire buffer to device
- int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize,
+ // Submit the populated source buffer to device.
+ int SubmitResult = Device.submitData(TgtPtrAddr, SrcBuffer, HstPtrSize,
AsyncInfo, PtrTPR.getEntry());
-
- AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int {
- delete[] DataBuffer;
- return OFFLOAD_SUCCESS;
- });
return HandleSubmitResult(SubmitResult);
}
@@ -525,10 +549,17 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
// ATTACH map-types are supposed to be handled after all mapping for the
// construct is done. Defer their processing.
if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) {
- AttachInfo->AttachEntries.emplace_back(
- /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
- /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
- /*PointeeName=*/HstPtrName);
+ const bool IsCorrespondingPointerInit =
+ (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE);
+ // We don't need to keep track of PRIVATE | ATTACH entries. They
+ // represent corresponding-pointer-initialization, and are handled
+ // similar to firstprivate (PRIVATE | TO) entries by
+ // PrivateArgumentManager.
+ if (!IsCorrespondingPointerInit)
+ AttachInfo->AttachEntries.emplace_back(
+ /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
+ /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
+ /*PointeeName=*/HstPtrName);
DP("Deferring ATTACH map-type processing for argument %d\n", I);
continue;
@@ -1397,13 +1428,24 @@ class PrivateArgumentManagerTy {
uint32_t Padding;
/// Host pointer name
map_var_info_t HstPtrName = nullptr;
+ /// For corresponding-pointer-initialization: host pointee base address.
+ void *HstPteeBase = nullptr;
+ /// For corresponding-pointer-initialization: host pointee begin address.
+ void *HstPteeBegin = nullptr;
+ /// Whether this argument needs corresponding-pointer-initialization.
+ bool IsCorrespondingPointerInit = false;
FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
uint32_t Alignment, uint32_t Padding,
- map_var_info_t HstPtrName = nullptr)
+ map_var_info_t HstPtrName = nullptr,
+ void *HstPteeBase = nullptr,
+ void *HstPteeBegin = nullptr,
+ bool IsCorrespondingPointerInit = false)
: HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
- Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
+ Size(Size), Padding(Padding), HstPtrName(HstPtrName),
+ HstPteeBase(HstPteeBase), HstPteeBegin(HstPteeBegin),
+ IsCorrespondingPointerInit(IsCorrespondingPointerInit) {}
};
/// A vector of target pointers for all private arguments
@@ -1421,6 +1463,153 @@ class PrivateArgumentManagerTy {
/// A pointer to a \p AsyncInfoTy object
AsyncInfoTy &AsyncInfo;
+ /// \returns the value of the target pointee's base to be used for
+ /// corresponding-pointer-initialization.
+ void *getTargetPointeeBaseForCorrespondingPointerInitialization(
+ void *HstPteeBase, void *HstPteeBegin) {
+ // See if the pointee's begin address has corresponding storage on device.
+ void *TgtPteeBegin = [&]() -> void * {
+ if (!HstPteeBegin) {
+ DP("Corresponding-pointer-initialization: pointee begin address is "
+ "null\n");
+ return nullptr;
+ }
+
+ return Device.getMappingInfo()
+ .getTgtPtrBegin(HstPteeBegin, /*Size=*/0, /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false)
+ .TargetPointer;
+ }();
+
+ // If it does, we calculate target pointee base using it, and return it.
+ // Otherwise, we retain the host pointee's base as the target pointee base
+ // of the initialized pointer. It's the user's responsibility to ensure
+ // that if a lookup fails, the host pointee is accessible on the device.
+ return TgtPteeBegin ? calculateTargetPointeeBase(HstPteeBase, HstPteeBegin,
+ TgtPteeBegin)
+ : HstPteeBase;
+ }
+
+ /// Initialize the source buffer for corresponding-pointer-initialization.
+ ///
+ /// It computes and stores the target pointee base address (or the host
+ /// pointee's base address, if lookup of target pointee fails) to the first
+ /// `sizeof(void*)` bytes of \p Buffer, and for larger pointers
+ /// (Fortran descriptors), the remaining fields of the host descriptor
+ /// \p HstPtr after those `sizeof(void*)` bytes.
+ ///
+ /// Corresponding-pointer-initialization represents the initialization of the
+ /// private version of a base-pointer/referring-pointer on a target construct.
+ ///
+ /// For example, for the following test:
+ /// ```cpp
+ /// int x[10];
+ /// int *px = &x[0];
+ /// ...
+ /// #pragma omp target data map(tofrom:px)
+ /// {
+ /// int **ppx = omp_get_mapped_ptr(&px, omp_get_default_device());
+ /// #pragma omp target map(tofrom:px[1]) is_device_ptr(ppx)
+ /// {
+ /// foo(px, ppx);
+ /// }
+ /// }
+ /// ```
+ /// The following shows a possible way to implement the mapping of `px`,
+ /// which is pre-determined firstprivate and should get initialized
+ /// via corresponding-pointer-initialization:
+ ///
+ /// (A) Possible way to implement the above with PRIVATE | ATTACH:
+ /// ```llvm
+ /// ; maps for px:
+ /// ; &px[0], &px[1], sizeof(px[1]), TO | FROM // (1)
+ /// ; &px, &px[1], sizeof(px), ATTACH // (2)
+ /// ; &px, &px[1], sizeof(px), PRIVATE | ATTACH | PARAM // (3)
+ /// call... @__omp_outlined...(ptr %px, ptr %ppx)
+ /// define ... @__omp_outlined(ptr %px, ptr %ppx) {...
+ /// foo(%px, %ppx)
+ /// ...}
+ /// ```
+ /// `(1)` maps the pointee `px[1].
+ /// `(2)` attaches it to the mapped version of `px`. It can be controlled by
+ /// the user based on the `attach(auto/always/never)` map-type modifier.
+ /// `(3)` privatizes and initializes the private pointer `px`, and passes it
+ /// into the kernel as the argument `%px`. Can be skipped if `px` is not
+ /// referenced in the target construct.
+ ///
+ /// While this method is not too beneficial compared to just doing the
+ /// initialization in the body of the kernel, like:
+ /// (B) Possible way to implement the above without PRIVATE | ATTACH:
+ /// ```llvm
+ /// ; maps for px:
+ /// ; &px[0], &px[1], sizeof(px[1]), TO | FROM | PARAM // (4)
+ /// ; &px, &px[1], sizeof(px), ATTACH // (5)
+ /// call... @__omp_outlined...(ptr %px0, ptr %ppx)
+ /// define ... __omp_outlined...(ptr %px0, ptr %ppx) {
+ /// %px = alloca ptr;
+ /// store ptr %px0, ptr %px
+ /// foo(%px, %ppx)
+ /// }
+ /// ```
+ ///
+ /// (B) is not so convenient for Fortran descriptors, because in
+ /// addition to the lookup, the remaining fields of the descriptor have
+ /// to be passed into the kernel to initialize the private copy, which
+ /// makes (A) a cleaner option for them. e.g.
+ /// ```f90
+ /// integer, pointer :: p(:)
+ /// !$omp target map(p(1))
+ /// ```
+ ///
+ /// (C) Possible mapping for the above Fortran test using PRIVATE | ATTACH:
+ /// ```llvm
+ /// ; maps for p:
+ /// ; &p(1), &p(1), sizeof(p(1)), TO | FROM
+ /// ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), ATTACH
+ /// ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), PRIVATE | ATTACH | PARAM
+ /// call... @__omp_outlined...(ptr %ref_ptr_of_p)
+ void initBufferForCorrespondingPointerInitialization(char *Buffer,
+ void *HstPtr,
+ int64_t HstPtrSize,
+ void *HstPteeBase,
+ void *HstPteeBegin) {
+ constexpr int64_t VoidPtrSize = sizeof(void *);
+ assert(HstPtrSize >= VoidPtrSize &&
+ "corresponding-pointer-initialization: pointer size is too small");
+
+ void *TgtPteeBase =
+ getTargetPointeeBaseForCorrespondingPointerInitialization(HstPteeBase,
+ HstPteeBegin);
+
+ // Store the target pointee base address to the first VoidPtrSize bytes
+ DP("Initializing corresponding-pointer-initialization source buffer "
+ "for " DPxMOD ", with pointee base " DPxMOD "\n",
+ DPxPTR(HstPtr), DPxPTR(TgtPteeBase));
+ std::memcpy(Buffer, &TgtPteeBase, VoidPtrSize);
+ if (HstPtrSize <= VoidPtrSize)
+ return;
+
+ // For Fortran descriptors, copy the remaining descriptor fields from host
+ uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+ void *HstDescriptorFieldsAddr = static_cast<char *>(HstPtr) + VoidPtrSize;
+ DP("Copying %" PRId64
+ " bytes of descriptor fields into corresponding-pointer-initialization "
+ "buffer at offset %" PRId64 ", from " DPxMOD "\n",
+ HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr));
+ std::memcpy(Buffer + VoidPtrSize, HstDescriptorFieldsAddr,
+ HstDescriptorFieldsSize);
+ }
+
+ /// Helper function to create and initialize a buffer to be used as the source
+ /// for corresponding-pointer-initialization.
+ void *createAndInitSourceBufferForCorrespondingPointerInitialization(
+ void *HstPtr, int64_t HstPtrSize, void *HstPteeBase, void *HstPteeBegin) {
+ char *Buffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize);
+ initBufferForCorrespondingPointerInitialization(Buffer, HstPtr, HstPtrSize,
+ HstPteeBase, HstPteeBegin);
+ return Buffer;
+ }
+
// TODO: What would be the best value here? Should we make it configurable?
// If the size is larger than this threshold, we will allocate and transfer it
// immediately instead of packing it.
@@ -1435,7 +1624,9 @@ public:
int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset,
bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex,
map_var_info_t HstPtrName = nullptr,
- const bool AllocImmediately = false) {
+ const bool AllocImmediately = false, void *HstPteeBase = nullptr,
+ void *HstPteeBegin = nullptr,
+ bool IsCorrespondingPointerInit = false) {
// If the argument is not first-private, or its size is greater than a
// predefined threshold, we will allocate memory and issue the transfer
// immediately.
@@ -1458,9 +1649,19 @@ public:
// If first-private, copy data from host
if (IsFirstPrivate) {
DP("Submitting firstprivate data to the device.\n");
- int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
+
+ // The source value used for corresponding-pointer-initialization
+ // is different vs regular firstprivates.
+ void *DataSource =
+ IsCorrespondingPointerInit
+ ? createAndInitSourceBufferForCorrespondingPointerInitialization(
+ HstPtr, ArgSize, HstPteeBase, HstPteeBegin)
+ : HstPtr;
+ int Ret = Device.submitData(TgtPtr, DataSource, ArgSize, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
- DP("Copying data to device failed, failed.\n");
+ DP("Copying %s data to device failed.\n",
+ IsCorrespondingPointerInit ? "corresponding-pointer-initialization"
+ : "firstprivate");
return OFFLOAD_FAIL;
}
}
@@ -1506,8 +1707,10 @@ public:
}
}
- FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
- StartAlignment, Padding, HstPtrName);
+ FirstPrivateArgInfo.emplace_back(
+ TgtArgsIndex, HstPtr, ArgSize, StartAlignment, Padding, HstPtrName,
+ HstPteeBase, HstPteeBegin, IsCorrespondingPointerInit);
+
FirstPrivateArgSize += Padding + ArgSize;
}
@@ -1526,7 +1729,13 @@ public:
for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
// First pad the pointer as we (have to) pad it on the device too.
Itr = std::next(Itr, Info.Padding);
- std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
+
+ if (Info.IsCorrespondingPointerInit)
+ initBufferForCorrespondingPointerInitialization(
+ &*Itr, Info.HstPtrBegin, Info.Size, Info.HstPteeBase,
+ Info.HstPteeBegin);
+ else
+ std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
Itr = std::next(Itr, Info.Size);
}
// Allocate target memory
@@ -1682,8 +1891,40 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
TgtPtrBegin = HstPtrBase;
TgtBaseOffset = 0;
} else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
+ // For cases like:
+ // ```
+ // int *p = ...;
+ // #pragma omp target map(p[0:10])
+ // ```
+ // `p` is predetermined firstprivate on the target construct, and the
+ // method to determine the initial value of the private copy on the
+ // device is called "corresponding-pointer-initialization".
+ //
+ // Such firstprivate pointers that need
+ // corresponding-pointer-initialization are represented using the
+ // `PRIVATE | ATTACH` map-types, in contrast to regular firstprivate
+ // entries, which use `PRIVATE | TO`. The structure of these
+ // `PRIVATE | ATTACH` entries is the same as the non-private
+ // `ATTACH` entries used to represent pointer-attachments, i.e.:
+ // ```
+ // &hst_ptr_base/begin, &hst_ptee_begin, sizeof(hst_ptr)
+ // ```
+ const bool IsAttach = (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH);
+ void *HstPteeBase = nullptr;
+ void *HstPteeBegin = nullptr;
+ if (IsAttach) {
+ // For corresponding-pointer-initialization, Args[I] is HstPteeBegin,
+ // and ArgBases[I] is both HstPtrBase/HstPtrBegin.
+ HstPteeBase = *reinterpret_cast<void **>(HstPtrBase);
+ HstPteeBegin = Args[I];
+ HstPtrBegin = ArgBases[I];
+ }
TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
- const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
+ // Corresponding-pointer-initialization is a special case of firstprivate,
+ // since it also involves initializing the private pointer.
+ const bool IsFirstPrivate =
+ (ArgTypes[I] & OMP_TGT_MAPTYPE_TO) || IsAttach;
+
// If there is a next argument and it depends on the current one, we need
// to allocate the private memory immediately. If this is not the case,
// then the argument can be marked for optimization and packed with the
@@ -1692,9 +1933,11 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
(I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
Ret = PrivateArgumentManager.addArg(
HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
- TgtArgs.size(), HstPtrName, AllocImmediately);
+ /*TgtArgsIndex=*/TgtArgs.size(), HstPtrName, AllocImmediately,
+ HstPteeBase, HstPteeBegin, /*IsCorrespondingPointerInit=*/IsAttach);
if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Failed to process %sprivate argument " DPxMOD "\n",
+ REPORT("Failed to process %s%sprivate argument " DPxMOD "\n",
+ IsAttach ? "corresponding-pointer-initialization " : "",
(IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
return OFFLOAD_FAIL;
}
diff --git a/offload/test/mapping/lambda_by_value.cpp b/offload/test/mapping/lambda_by_value.cpp
index 5516dedd..4c0278d 100644
--- a/offload/test/mapping/lambda_by_value.cpp
+++ b/offload/test/mapping/lambda_by_value.cpp
@@ -1,4 +1,5 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
+// RUN: %libomptarget-compileopt-generic -fno-exceptions
+// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
#include <stdint.h>
#include <stdio.h>
diff --git a/offload/test/mapping/map_back_race.cpp b/offload/test/mapping/map_back_race.cpp
index 8a988d3..49bbe87 100644
--- a/offload/test/mapping/map_back_race.cpp
+++ b/offload/test/mapping/map_back_race.cpp
@@ -2,6 +2,9 @@
// Taken from https://github.com/llvm/llvm-project/issues/54216
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
+
#include <algorithm>
#include <cstdlib>
#include <iostream>
diff --git a/offload/test/mapping/map_both_pointer_pointee.c b/offload/test/mapping/map_both_pointer_pointee.c
index 7be1ba4..1934b70 100644
--- a/offload/test/mapping/map_both_pointer_pointee.c
+++ b/offload/test/mapping/map_both_pointer_pointee.c
@@ -1,11 +1,10 @@
-// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
-// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
-// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
-// RUN: %libomptarget-compile-run-and-check-x86_64-unknown-linux-gnu
-// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: unified_shared_memory
// UNSUPPORTED: amdgcn-amd-amdhsa
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// XFAIL: nvidiagpu
#pragma omp declare target
int *ptr1;
diff --git a/offload/test/mapping/map_ptr_and_star_local.c b/offload/test/mapping/map_ptr_and_star_local.c
index cc826b3..97fa7cd 100644
--- a/offload/test/mapping/map_ptr_and_star_local.c
+++ b/offload/test/mapping/map_ptr_and_star_local.c
@@ -1,6 +1,9 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
+// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: libc
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// XFAIL: gpu
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/mapping/map_structptr_and_member_global.c b/offload/test/mapping/map_structptr_and_member_global.c
index 960eea4..f855e87 100644
--- a/offload/test/mapping/map_structptr_and_member_global.c
+++ b/offload/test/mapping/map_structptr_and_member_global.c
@@ -1,6 +1,9 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
+// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: libc
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// XFAIL: gpu
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/mapping/map_structptr_and_member_local.c b/offload/test/mapping/map_structptr_and_member_local.c
index bd75940..bd9e2a8 100644
--- a/offload/test/mapping/map_structptr_and_member_local.c
+++ b/offload/test/mapping/map_structptr_and_member_local.c
@@ -1,6 +1,9 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
+// RUN: %libomptarget-compile-run-and-check-generic
// REQUIRES: libc
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// XFAIL: gpu
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
index 1f84a0e..b2e1edf 100644
--- a/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
+++ b/offload/test/offloading/CUDA/basic_launch_multi_arg.cu
@@ -5,10 +5,10 @@
// RUN: %t | %fcheck-generic
// clang-format on
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-unknown-linux-gnu
-// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
+// REQUIRES: gpu
+//
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// XFAIL: gpu
#include <stdio.h>
diff --git a/offload/test/offloading/bug51781.c b/offload/test/offloading/bug51781.c
index 2f30b03..ff7fa51 100644
--- a/offload/test/offloading/bug51781.c
+++ b/offload/test/offloading/bug51781.c
@@ -16,6 +16,7 @@
// the generic state machine.
//
// RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt \
+// RUN: -Xoffload-linker -mllvm=-openmp-opt-disable-spmdization \
// RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
@@ -24,7 +25,9 @@
// Repeat with reduction clause, which has managed to break the custom state
// machine in the past.
//
-// RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt -DADD_REDUCTION \
+// RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt \
+// RUN: -DADD_REDUCTION \
+// RUN: -Xoffload-linker -mllvm=-openmp-opt-disable-spmdization \
// RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1
// RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom
// RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom
diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90
index b9c2d34..b44c0b2 100644
--- a/offload/test/offloading/fortran/declare-target-automap.f90
+++ b/offload/test/offloading/fortran/declare-target-automap.f90
@@ -1,6 +1,9 @@
!Offloading test for AUTOMAP modifier in declare target enter
! REQUIRES: flang, amdgpu
+! FIXME: https://github.com/llvm/llvm-project/issues/161265
+! XFAIL: amdgpu
+
! RUN: %libomptarget-compile-fortran-run-and-check-generic
program automap_program
use iso_c_binding, only: c_loc
diff --git a/offload/test/offloading/interop.c b/offload/test/offloading/interop.c
index 26287e3..d9fa2ef 100644
--- a/offload/test/offloading/interop.c
+++ b/offload/test/offloading/interop.c
@@ -1,5 +1,6 @@
// RUN: %libomptarget-compile-run-and-check-generic
-// REQUIRES: nvptx64-nvidia-cuda
+
+// XFAIL: *
#include <assert.h>
#include <omp.h>
diff --git a/offload/test/offloading/single_threaded_for_barrier_hang_1.c b/offload/test/offloading/single_threaded_for_barrier_hang_1.c
index 8ee6b51..a007521 100644
--- a/offload/test/offloading/single_threaded_for_barrier_hang_1.c
+++ b/offload/test/offloading/single_threaded_for_barrier_hang_1.c
@@ -1,6 +1,9 @@
// RUN: %libomptarget-compile-run-and-check-generic
// RUN: %libomptarget-compileopt-run-and-check-generic
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
+
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/offloading/single_threaded_for_barrier_hang_2.c b/offload/test/offloading/single_threaded_for_barrier_hang_2.c
index a98abd6..cabd2ed 100644
--- a/offload/test/offloading/single_threaded_for_barrier_hang_2.c
+++ b/offload/test/offloading/single_threaded_for_barrier_hang_2.c
@@ -1,6 +1,7 @@
// RUN: %libomptarget-compile-run-and-check-generic
-// FIXME: This fails with optimization enabled and prints b: 0
-// FIXME: RUN: %libomptarget-compileopt-run-and-check-generic
+
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: gpu
#include <omp.h>
#include <stdio.h>
diff --git a/offload/test/offloading/spmdization.c b/offload/test/offloading/spmdization.c
index 7f3f47d..48627cd 100644
--- a/offload/test/offloading/spmdization.c
+++ b/offload/test/offloading/spmdization.c
@@ -2,7 +2,8 @@
// RUN: %libomptarget-compileopt-generic
// RUN: env LIBOMPTARGET_INFO=16 \
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,SPMD
-// RUN: %libomptarget-compileopt-generic -mllvm --openmp-opt-disable-spmdization
+// RUN: %libomptarget-compileopt-generic -mllvm --openmp-opt-disable-spmdization \
+// RUN: -Xoffload-linker -mllvm=--openmp-opt-disable-spmdization
// RUN: env LIBOMPTARGET_INFO=16 \
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,GENERIC
// clang-format on
diff --git a/offload/test/sanitizer/ptr_outside_alloc_1.c b/offload/test/sanitizer/ptr_outside_alloc_1.c
index bdd0283..b30ce12e 100644
--- a/offload/test/sanitizer/ptr_outside_alloc_1.c
+++ b/offload/test/sanitizer/ptr_outside_alloc_1.c
@@ -5,12 +5,10 @@
// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE
// clang-format on
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-unknown-linux-gnu
-// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: nvidiagpu
+//
+// REQUIRES: gpu
#include <omp.h>
diff --git a/offload/test/sanitizer/ptr_outside_alloc_2.c b/offload/test/sanitizer/ptr_outside_alloc_2.c
index 6a67962..3bb8bda 100644
--- a/offload/test/sanitizer/ptr_outside_alloc_2.c
+++ b/offload/test/sanitizer/ptr_outside_alloc_2.c
@@ -3,12 +3,10 @@
// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
// clang-format on
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-unknown-linux-gnu
-// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: nvidiagpu
+//
+// REQUIRES: gpu
#include <omp.h>
diff --git a/offload/test/sanitizer/use_after_free_1.c b/offload/test/sanitizer/use_after_free_1.c
index c4783c5c..acc1de3 100644
--- a/offload/test/sanitizer/use_after_free_1.c
+++ b/offload/test/sanitizer/use_after_free_1.c
@@ -5,12 +5,10 @@
// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE
// clang-format on
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-unknown-linux-gnu
-// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: nvidiagpu
+//
+// REQUIRES: gpu
#include <omp.h>
diff --git a/offload/test/sanitizer/use_after_free_2.c b/offload/test/sanitizer/use_after_free_2.c
index 1c1e097..3d70fb7 100644
--- a/offload/test/sanitizer/use_after_free_2.c
+++ b/offload/test/sanitizer/use_after_free_2.c
@@ -3,12 +3,10 @@
// RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_ALLOCATION_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK
// clang-format on
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: x86_64-unknown-linux-gnu
-// UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+// FIXME: https://github.com/llvm/llvm-project/issues/161265
+// UNSUPPORTED: nvidiagpu
+//
+// REQUIRES: gpu
// If offload memory pooling is enabled for a large allocation, reuse error is
// not detected. UNSUPPORTED: large_allocation_memory_pool
diff --git a/offload/tools/deviceinfo/llvm-offload-device-info.cpp b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
index 67a6e07..9b58d67 100644
--- a/offload/tools/deviceinfo/llvm-offload-device-info.cpp
+++ b/offload/tools/deviceinfo/llvm-offload-device-info.cpp
@@ -137,7 +137,7 @@ ol_result_t printDeviceValue(std::ostream &S, ol_device_handle_t Dev,
size_t Size;
OFFLOAD_ERR(olGetDeviceInfoSize(Dev, Info, &Size));
Val.resize(Size);
- OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, sizeof(Val), Val.data()));
+ OFFLOAD_ERR(olGetDeviceInfo(Dev, Info, Size, Val.data()));
doWrite<T, PK>(S, reinterpret_cast<T>(Val.data()));
} else {
T Val;