aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/AMDGPUUsage.rst16
-rw-r--r--llvm/docs/GettingStartedVS.rst13
-rw-r--r--llvm/include/llvm/ADT/StringSwitch.h9
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def1
-rw-r--r--llvm/include/llvm/CAS/CASID.h3
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervals.h4
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h19
-rw-r--r--llvm/include/llvm/ObjectYAML/ELFYAML.h1
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp13
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp2
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp2
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp15
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp3
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp39
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp15
-rw-r--r--llvm/lib/IR/ConstantFold.cpp3
-rw-r--r--llvm/lib/ObjCopy/ConfigManager.cpp16
-rw-r--r--llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp42
-rw-r--r--llvm/lib/Object/ELF.cpp10
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp8
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp1
-rw-r--r--llvm/lib/Support/TextEncoding.cpp6
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepoint.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp32
-rw-r--r--llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp32
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZ.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp420
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h12
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp25
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h13
-rw-r--r--llvm/test/Analysis/ScalarEvolution/ptrtoint.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir278
-rw-r--r--llvm/test/CodeGen/AArch64/combine-sdiv.ll85
-rw-r--r--llvm/test/CodeGen/AArch64/sme-za-exceptions.ll242
-rw-r--r--llvm/test/CodeGen/SystemZ/htm-intrinsics.ll4
-rw-r--r--llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll738
-rw-r--r--llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll1665
-rw-r--r--llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll109
-rw-r--r--llvm/test/CodeGen/X86/isel-fpclass.ll433
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll27
-rw-r--r--llvm/test/CodeGen/X86/pr160612.ll74
-rw-r--r--llvm/test/CodeGen/X86/setcc-wide-types.ll65
-rw-r--r--llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll52
-rw-r--r--llvm/test/MC/AArch64/data-directive-specifier.s3
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll2
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll4
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll60
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll2
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll2
-rw-r--r--llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll2
-rw-r--r--llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll2
-rw-r--r--llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll15
-rw-r--r--llvm/test/Transforms/InstSimplify/ptrmask.ll20
-rw-r--r--llvm/test/Transforms/LoopUnroll/scevunroll.ll3
-rw-r--r--llvm/test/Transforms/SROA/phi-and-select.ll22
-rw-r--r--llvm/test/Transforms/SROA/phi-gep.ll29
-rw-r--r--llvm/test/Transforms/SROA/select-gep.ll19
-rw-r--r--llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml27
-rw-r--r--llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml278
-rw-r--r--llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test7
-rw-r--r--llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml86
-rw-r--r--llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml27
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp2
-rw-r--r--llvm/tools/obj2yaml/elf2yaml.cpp7
-rw-r--r--llvm/unittests/ADT/TypeTraitsTest.cpp4
-rw-r--r--llvm/unittests/IR/ConstantsTest.cpp14
-rw-r--r--llvm/unittests/Object/ELFObjectFileTest.cpp178
-rw-r--r--llvm/unittests/Object/ELFTypesTest.cpp35
-rw-r--r--llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp108
-rw-r--r--llvm/utils/profcheck-xfail.txt5
94 files changed, 5118 insertions, 534 deletions
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index a4d110f..402fd05 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4172,7 +4172,7 @@ non-AMD key names should be prefixed by "*vendor-name*.".
"Image", or "Pipe". This may be
more restrictive than indicated
by "AccQual" to reflect what the
- kernel actual does. If not
+ kernel actually does. If not
present then the runtime must
assume what is implied by
"AccQual" and "IsConst". Values
@@ -5436,8 +5436,8 @@ The fields used by CP for code objects before V3 also match those specified in
``COMPUTE_PGM_RSRC1.PRIORITY``.
13:12 2 bits FLOAT_ROUND_MODE_32 Wavefront starts execution
with specified rounding
- mode for single (32
- bit) floating point
+ mode for single (32-bit)
+ floating point
precision floating point
operations.
@@ -5769,7 +5769,7 @@ The fields used by CP for code objects before V3 also match those specified in
Wavefront starts execution
with memory violation
- exceptions exceptions
+ exceptions
enabled which are generated
when a memory violation has
occurred for this wavefront from
@@ -6005,7 +6005,7 @@ The fields used by CP for code objects before V3 also match those specified in
FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush
====================================== ===== ====================================
- Denormal flushing is sign respecting. i.e. the behavior expected by
+ Denormal flushing is sign respecting, i.e., the behavior expected by
``"denormal-fp-math"="preserve-sign"``. The behavior is undefined with
``"denormal-fp-math"="positive-zero"``
@@ -16831,7 +16831,7 @@ For GFX125x:
* Some memory operations contain a ``nv`` bit, for "non-volatile", which indicates
memory that is not expected to change during a kernel's execution.
This information is propagated to the cache lines for that address
- (refered to as ``$nv``).
+ (referred to as ``$nv``).
* When ``nv=0`` reads hit dirty ``$nv=1`` data in cache, the hardware will
writeback the data to the next level in the hierarchy and then subsequently read
@@ -18970,7 +18970,7 @@ On entry to a function:
#. All other registers are unspecified.
#. Any necessary ``s_waitcnt`` has been performed to ensure memory is available
to the function.
-#. Use pass-by-reference (byref) in stead of pass-by-value (byval) for struct
+#. Use pass-by-reference (byref) instead of pass-by-value (byval) for struct
arguments in C ABI. Callee is responsible for allocating stack memory and
copying the value of the struct if modified. Note that the backend still
supports byval for struct arguments.
@@ -20214,7 +20214,7 @@ from the value of the ``-mcpu`` option that is passed to the assembler.
.amdgpu_hsa_kernel (name)
+++++++++++++++++++++++++
-This directives specifies that the symbol with given name is a kernel entry
+This directive specifies that the symbol with given name is a kernel entry
point (label) and the object should contain corresponding symbol of type
STT_AMDGPU_HSA_KERNEL.
diff --git a/llvm/docs/GettingStartedVS.rst b/llvm/docs/GettingStartedVS.rst
index bc5746d..e65fd8f 100644
--- a/llvm/docs/GettingStartedVS.rst
+++ b/llvm/docs/GettingStartedVS.rst
@@ -126,6 +126,15 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6:
cmake -S llvm\llvm -B build -DLLVM_ENABLE_PROJECTS=clang -DLLVM_TARGETS_TO_BUILD=X86 -Thost=x64
exit
+ .. note::
+ By default, the Visual Studio project files generated by CMake use the
+ 32-bit toolset. If you are developing on a 64-bit version of Windows and
+ want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when
+ generating the Visual Studio solution. This requires CMake 3.8.0 or later.
+
+ For Windows on Arm the equivalent is ``-Thost=ARM64``, but this the default
+ for those hosts, so you do not have to use this option.
+
``LLVM_ENABLE_PROJECTS`` specifies any additional LLVM projects you want to
build while ``LLVM_TARGETS_TO_BUILD`` selects the compiler targets. If
``LLVM_TARGETS_TO_BUILD`` is omitted by default all targets are built
@@ -149,10 +158,6 @@ These instructions were tested with Visual Studio 2019 and Python 3.9.6:
* CMake generates project files for all build types. To select a specific
build type, use the Configuration manager from the VS IDE or the
``/property:Configuration`` command-line option when using MSBuild.
- * By default, the Visual Studio project files generated by CMake use the
- 32-bit toolset. If you are developing on a 64-bit version of Windows and
- want to use the 64-bit toolset, pass the ``-Thost=x64`` flag when
- generating the Visual Studio solution. This requires CMake 3.8.0 or later.
13. Start Visual Studio and select configuration:
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index a96535c..1bb07e39 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -54,17 +54,14 @@ public:
explicit StringSwitch(StringRef S)
: Str(S), Result() { }
+ StringSwitch(StringSwitch &&) = default;
+
// StringSwitch is not copyable.
StringSwitch(const StringSwitch &) = delete;
// StringSwitch is not assignable due to 'Str' being 'const'.
void operator=(const StringSwitch &) = delete;
- void operator=(StringSwitch &&other) = delete;
-
- StringSwitch(StringSwitch &&other)
- : Str(other.Str), Result(std::move(other.Result)) { }
-
- ~StringSwitch() = default;
+ void operator=(StringSwitch &&) = delete;
// Case-sensitive case matchers
StringSwitch &Case(StringLiteral S, T Value) {
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
index 8dcc292..1cfcdbf 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
@@ -62,6 +62,7 @@ ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139)
ELF_RELOC(R_AARCH64_PLT32, 0x13a)
ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b)
ELF_RELOC(R_AARCH64_PATCHINST, 0x13c)
+ELF_RELOC(R_AARCH64_FUNCINIT64, 0x13d)
// General dynamic TLS relocations
ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200)
ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201)
diff --git a/llvm/include/llvm/CAS/CASID.h b/llvm/include/llvm/CAS/CASID.h
index 8820994..f508ed3 100644
--- a/llvm/include/llvm/CAS/CASID.h
+++ b/llvm/include/llvm/CAS/CASID.h
@@ -95,8 +95,7 @@ public:
}
friend hash_code hash_value(const CASID &ID) {
- ArrayRef<uint8_t> Hash = ID.getHash();
- return hash_combine_range(Hash.begin(), Hash.end());
+ return hash_combine_range(ID.getHash());
}
const CASContext &getContext() const {
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 1050b3d..c252f9d 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -229,8 +229,8 @@ public:
/// doing something wrong if you call pruneValue directly on a
/// LiveInterval. Indeed, you are supposed to call pruneValue on the main
/// LiveRange and all the LiveRanges of the subranges if any.
- LLVM_ATTRIBUTE_UNUSED void pruneValue(LiveInterval &, SlotIndex,
- SmallVectorImpl<SlotIndex> *) {
+ [[maybe_unused]] void pruneValue(LiveInterval &, SlotIndex,
+ SmallVectorImpl<SlotIndex> *) {
llvm_unreachable(
"Use pruneValue on the main LiveRange and on each subrange");
}
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2f..e9a417d 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -833,6 +833,7 @@ struct BBAddrMap {
bool MultiBBRange : 1;
bool OmitBBEntries : 1;
bool CallsiteEndOffsets : 1;
+ bool BBHash : 1;
bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
@@ -845,7 +846,8 @@ struct BBAddrMap {
(static_cast<uint8_t>(BrProb) << 2) |
(static_cast<uint8_t>(MultiBBRange) << 3) |
(static_cast<uint8_t>(OmitBBEntries) << 4) |
- (static_cast<uint8_t>(CallsiteEndOffsets) << 5);
+ (static_cast<uint8_t>(CallsiteEndOffsets) << 5) |
+ (static_cast<uint8_t>(BBHash) << 6);
}
// Decodes from minimum bit width representation and validates no
@@ -854,7 +856,8 @@ struct BBAddrMap {
Features Feat{
static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
- static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
+ static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)),
+ static_cast<bool>(Val & (1 << 6))};
if (Feat.encode() != Val)
return createStringError(
std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -864,10 +867,10 @@ struct BBAddrMap {
bool operator==(const Features &Other) const {
return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
- OmitBBEntries, CallsiteEndOffsets) ==
+ OmitBBEntries, CallsiteEndOffsets, BBHash) ==
std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
Other.MultiBBRange, Other.OmitBBEntries,
- Other.CallsiteEndOffsets);
+ Other.CallsiteEndOffsets, Other.BBHash);
}
};
@@ -920,17 +923,19 @@ struct BBAddrMap {
false}; // Metdata for this basic block.
// Offsets of end of call instructions, relative to the basic block start.
SmallVector<uint32_t, 1> CallsiteEndOffsets;
+ uint64_t Hash = 0; // Hash for this basic block.
BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD,
- SmallVector<uint32_t, 1> CallsiteEndOffsets)
+ SmallVector<uint32_t, 1> CallsiteEndOffsets, uint64_t Hash)
: ID(ID), Offset(Offset), Size(Size), MD(MD),
- CallsiteEndOffsets(std::move(CallsiteEndOffsets)) {}
+ CallsiteEndOffsets(std::move(CallsiteEndOffsets)), Hash(Hash) {}
UniqueBBID getID() const { return {ID, 0}; }
bool operator==(const BBEntry &Other) const {
return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&
- MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets;
+ MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets &&
+ Hash == Other.Hash;
}
bool hasReturn() const { return MD.HasReturn; }
diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index c90591d..a7c7c7c 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -163,6 +163,7 @@ struct BBAddrMapEntry {
llvm::yaml::Hex64 Size;
llvm::yaml::Hex64 Metadata;
std::optional<std::vector<llvm::yaml::Hex64>> CallsiteEndOffsets;
+ std::optional<llvm::yaml::Hex64> Hash;
};
uint8_t Version;
llvm::yaml::Hex8 Feature;
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b8c540c..9f8ac6e 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -849,17 +849,12 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
/// %sum.2 = select %cmp, %add, %sum.1
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
- SelectInst *SI = dyn_cast<SelectInst>(I);
- if (!SI)
- return InstDesc(false, I);
-
- CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
+ Value *TrueVal, *FalseVal;
// Only handle single use cases for now.
- if (!CI || !CI->hasOneUse())
+ if (!match(I,
+ m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
return InstDesc(false, I);
- Value *TrueVal = SI->getTrueValue();
- Value *FalseVal = SI->getFalseValue();
// Handle only when either of operands of select instruction is a PHI
// node for now.
if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
@@ -886,7 +881,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
if (!IPhi || IPhi != FalseVal)
return InstDesc(false, I);
- return InstDesc(true, SI);
+ return InstDesc(true, I);
}
RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 4e38626..e08ef60 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6644,7 +6644,7 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
"Invalid mask width");
// If index-width (mask size) is less than pointer-size then mask is
// 1-extended.
- if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+ if (match(Op1, m_PtrToIntOrAddr(m_Specific(Op0))))
return Op0;
// NOTE: We may have attributes associated with the return value of the
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index ab37338..0b2e3fc 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -393,7 +393,7 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
/// \param AA The AliasAnalysis we used for our search.
/// \param AllowImpreciseClobber Always false, unless we do relaxed verify.
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt,
const MemoryLocation &StartLoc, const MemorySSA &MSSA,
const UpwardsMemoryQuery &Query, BatchAAResults &AA,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3fab6b0..4bafd3f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -15761,6 +15761,21 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
const SCEV *OneAlignedUp =
GetNextSCEVDividesByDivisor(One, DividesBy);
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
+ } else {
+ if (LHS->getType()->isPointerTy()) {
+ LHS = SE.getLosslessPtrToIntExpr(LHS);
+ RHS = SE.getLosslessPtrToIntExpr(RHS);
+ if (isa<SCEVCouldNotCompute>(LHS) || isa<SCEVCouldNotCompute>(RHS))
+ break;
+ }
+ auto AddSubRewrite = [&](const SCEV *A, const SCEV *B) {
+ const SCEV *Sub = SE.getMinusSCEV(A, B);
+ AddRewrite(Sub, Sub,
+ SE.getUMaxExpr(Sub, SE.getOne(From->getType())));
+ };
+ AddSubRewrite(LHS, RHS);
+ AddSubRewrite(RHS, LHS);
+ continue;
}
break;
default:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 219bbc9..05fffe9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1437,7 +1437,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
BrProbEnabled,
MF.hasBBSections() && NumMBBSectionRanges > 1,
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
- HasCalls};
+ HasCalls,
+ false};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 4931403..53f1cfe2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -770,7 +770,7 @@ struct PartwordMaskValues {
Value *Inv_Mask = nullptr;
};
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
auto PrintObj = [&O](auto *V) {
if (V)
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index 3812823..04d9309 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -112,7 +112,7 @@ APInt GISelValueTracking::getKnownOnes(Register R) {
return getKnownBits(R).One;
}
-LLVM_ATTRIBUTE_UNUSED static void
+[[maybe_unused]] static void
dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
<< "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
@@ -2013,6 +2013,43 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
break;
}
+ case TargetOpcode::G_ADD: {
+ Register Src2 = MI.getOperand(2).getReg();
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Src2NumSignBits <= 2)
+ return 1; // Early out.
+
+ Register Src1 = MI.getOperand(1).getReg();
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Src1NumSignBits == 1)
+ return 1; // Early Out.
+
+ // Special case decrementing a value (ADD X, -1):
+ KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth);
+ if (Known2.isAllOnes()) {
+ KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth);
+ // If the input is known to be 0 or 1, the output is 0/-1, which is all
+ // sign bits set.
+ if ((Known1.Zero | 1).isAllOnes())
+ return TyBits;
+
+ // If we are subtracting one from a positive number, there is no carry
+ // out of the result.
+ if (Known1.isNonNegative()) {
+ FirstAnswer = Src1NumSignBits;
+ break;
+ }
+
+ // Otherwise, we treat this like an ADD.
+ }
+
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
+ FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3268c26..9662511 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1551,7 +1551,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &llvm::operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e153842..787a81a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5044,7 +5044,6 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
- ConstantSDNode *N1C = isConstOrConstSplat(N1);
// X / undef -> undef
// X % undef -> undef
@@ -5076,7 +5075,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
// it's a 1.
- if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
+ if (isOneOrOneSplat(N1) || (VT.getScalarType() == MVT::i1))
return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c9aeef7..90edaf3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5063,8 +5063,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::ADD:
case ISD::ADDC:
- // Add can have at most one carry bit. Thus we know that the output
- // is, at worst, one more bit than the inputs.
+ // TODO: Move Operand 1 check before Operand 0 check
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
if (Tmp == 1) return 1; // Early out.
@@ -5088,6 +5087,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
if (Tmp2 == 1) return 1; // Early out.
+
+ // Add can have at most one carry bit. Thus we know that the output
+ // is, at worst, one more bit than the inputs.
return std::min(Tmp, Tmp2) - 1;
case ISD::SUB:
Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -6403,8 +6405,9 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
if (VT.isScalableVector())
return SDValue();
- // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
- // simplified to one big BUILD_VECTOR.
+ // A CONCAT_VECTOR of scalar sources, such as UNDEF, BUILD_VECTOR and
+ // single-element INSERT_VECTOR_ELT operands can be simplified to one big
+ // BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
EVT SVT = VT.getScalarType();
SmallVector<SDValue, 16> Elts;
@@ -6414,6 +6417,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT));
else if (Op.getOpcode() == ISD::BUILD_VECTOR)
Elts.append(Op->op_begin(), Op->op_end());
+ else if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ OpVT.getVectorNumElements() == 1 &&
+ isNullConstant(Op.getOperand(2)))
+ Elts.push_back(Op.getOperand(1));
else
return SDValue();
}
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 3842b1a..6a9ef2e 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -741,7 +741,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
assert(!CI2->isZero() && "And zero handled above");
if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
// If and'ing the address of a global with a constant, fold it.
- if (CE1->getOpcode() == Instruction::PtrToInt &&
+ if ((CE1->getOpcode() == Instruction::PtrToInt ||
+ CE1->getOpcode() == Instruction::PtrToAddr) &&
isa<GlobalValue>(CE1->getOperand(0))) {
GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
index eef8a21..6b7b4f1 100644
--- a/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -122,14 +122,14 @@ ConfigManager::getDXContainerConfig() const {
if (!Common.AddGnuDebugLink.empty() || !Common.SplitDWO.empty() ||
!Common.AllocSectionsPrefix.empty() ||
Common.DiscardMode != DiscardType::None || !Common.AddSection.empty() ||
- !Common.DumpSection.empty() || !Common.KeepSection.empty() ||
- !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- !Common.SetSectionFlags.empty() || !Common.SetSectionType.empty() ||
- Common.ExtractDWO || Common.OnlyKeepDebug || Common.StripAllGNU ||
- Common.StripDWO || Common.StripDebug || Common.StripNonAlloc ||
- Common.StripSections || Common.StripUnneeded ||
- Common.DecompressDebugSections || Common.GapFill != 0 ||
- Common.PadTo != 0 || Common.ChangeSectionLMAValAll != 0 ||
+ !Common.KeepSection.empty() || !Common.SectionsToRename.empty() ||
+ !Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
+ !Common.SetSectionType.empty() || Common.ExtractDWO ||
+ Common.OnlyKeepDebug || Common.StripAllGNU || Common.StripDWO ||
+ Common.StripDebug || Common.StripNonAlloc || Common.StripSections ||
+ Common.StripUnneeded || Common.DecompressDebugSections ||
+ Common.GapFill != 0 || Common.PadTo != 0 ||
+ Common.ChangeSectionLMAValAll != 0 ||
!Common.ChangeSectionAddress.empty()) {
return createStringError(llvm::errc::invalid_argument,
"option is not supported for DXContainer");
diff --git a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
index d7f3c0d..95ab3d9 100644
--- a/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
+++ b/llvm/lib/ObjCopy/DXContainer/DXContainerObjcopy.cpp
@@ -9,8 +9,10 @@
#include "llvm/ObjCopy/DXContainer/DXContainerObjcopy.h"
#include "DXContainerReader.h"
#include "DXContainerWriter.h"
+#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/ObjCopy/CommonConfig.h"
#include "llvm/ObjCopy/DXContainer/DXContainerConfig.h"
+#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -42,7 +44,47 @@ static Error extractPartAsObject(StringRef PartName, StringRef OutFilename,
"part '%s' not found", PartName.str().c_str());
}
+static Error dumpPartToFile(StringRef PartName, StringRef Filename,
+ StringRef InputFilename, Object &Obj) {
+ auto PartIter = llvm::find_if(
+ Obj.Parts, [&PartName](const Part &P) { return P.Name == PartName; });
+ if (PartIter == Obj.Parts.end())
+ return createFileError(Filename,
+ std::make_error_code(std::errc::invalid_argument),
+ "part '%s' not found", PartName.str().c_str());
+ ArrayRef<uint8_t> Contents = PartIter->Data;
+ // The DXContainer format is a bit odd because the part-specific headers are
+ // contained inside the part data itself. For parts that contain LLVM bitcode
+ // when we dump the part we want to skip the part-specific header so that we
+ // get a valid .bc file that we can inspect. All the data contained inside the
+ // program header is pulled out of the bitcode, so the header can be
+ // reconstructed if needed from the bitcode itself. More comprehensive
+ // documentation on the DXContainer format can be found at
+ // https://llvm.org/docs/DirectX/DXContainer.html.
+
+ if (PartName == "DXIL" || PartName == "STAT")
+ Contents = Contents.drop_front(sizeof(llvm::dxbc::ProgramHeader));
+ if (Contents.empty())
+ return createFileError(Filename, object_error::parse_failed,
+ "part '%s' is empty", PartName.str().c_str());
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(Filename, Contents.size());
+ if (!BufferOrErr)
+ return createFileError(Filename, BufferOrErr.takeError());
+ std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+ llvm::copy(Contents, Buf->getBufferStart());
+ if (Error E = Buf->commit())
+ return createFileError(Filename, std::move(E));
+ return Error::success();
+}
+
static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+ for (StringRef Flag : Config.DumpSection) {
+ auto [SecName, FileName] = Flag.split("=");
+ if (Error E = dumpPartToFile(SecName, FileName, Config.InputFilename, Obj))
+ return E;
+ }
+
// Extract all sections before any modifications.
for (StringRef Flag : Config.ExtractSection) {
StringRef SectionName;
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 53699ce0..f256e7b 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -837,7 +837,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
Version = Data.getU8(Cur);
if (!Cur)
break;
- if (Version < 2 || Version > 3)
+ if (Version < 2 || Version > 4)
return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
Twine(static_cast<int>(Version)));
Feature = Data.getU8(Cur); // Feature byte
@@ -852,6 +852,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
"callsite offsets feature is enabled: version = " +
Twine(static_cast<int>(Version)) +
" feature = " + Twine(static_cast<int>(Feature)));
+ if (FeatEnable.BBHash && Version < 4)
+ return createError("version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when "
+ "basic block hash feature is enabled: version = " +
+ Twine(static_cast<int>(Version)) +
+ " feature = " + Twine(static_cast<int>(Feature)));
uint32_t NumBlocksInBBRange = 0;
uint32_t NumBBRanges = 1;
typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0;
@@ -907,6 +912,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
uint32_t Size = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) +
LastCallsiteEndOffset;
uint32_t MD = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint64_t Hash = FeatEnable.BBHash ? Data.getU64(Cur) : 0;
Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr =
BBAddrMap::BBEntry::Metadata::decode(MD);
if (!MetadataOrErr) {
@@ -914,7 +920,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
break;
}
BBEntries.push_back({ID, Offset + PrevBBEndOffset, Size,
- *MetadataOrErr, CallsiteEndOffsets});
+ *MetadataOrErr, CallsiteEndOffsets, Hash});
PrevBBEndOffset += Offset + Size;
}
TotalNumBlocks += BBEntries.size();
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index faeeab3..8b75fbe 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1465,7 +1465,7 @@ void ELFState<ELFT>::writeSectionContent(
for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
// Write version and feature values.
if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
- if (E.Version > 3)
+ if (E.Version > 4)
WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
<< static_cast<int>(E.Version)
<< "; encoding using the most recent version";
@@ -1526,6 +1526,12 @@ void ELFState<ELFT>::writeSectionContent(
}
SHeader.sh_size += CBA.writeULEB128(BBE.Size);
SHeader.sh_size += CBA.writeULEB128(BBE.Metadata);
+ if (FeatureOrErr->BBHash || BBE.Hash.has_value()) {
+ uint64_t Hash =
+ BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0);
+ CBA.write<uint64_t>(Hash, ELFT::Endianness);
+ SHeader.sh_size += 8;
+ }
}
}
if (!PGOAnalyses)
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index d9cce1e..421d6603 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1887,6 +1887,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry>::mapping(
IO.mapRequired("Size", E.Size);
IO.mapRequired("Metadata", E.Metadata);
IO.mapOptional("CallsiteEndOffsets", E.CallsiteEndOffsets);
+ IO.mapOptional("Hash", E.Hash);
}
void MappingTraits<ELFYAML::PGOAnalysisMapEntry>::mapping(
diff --git a/llvm/lib/Support/TextEncoding.cpp b/llvm/lib/Support/TextEncoding.cpp
index 804ff07..41f5187 100644
--- a/llvm/lib/Support/TextEncoding.cpp
+++ b/llvm/lib/Support/TextEncoding.cpp
@@ -54,9 +54,9 @@ static std::optional<TextEncoding> getKnownEncoding(StringRef Name) {
return std::nullopt;
}
-LLVM_ATTRIBUTE_UNUSED static void
-HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
- SmallVectorImpl<char> &Result) {
+[[maybe_unused]] static void HandleOverflow(size_t &Capacity, char *&Output,
+ size_t &OutputLength,
+ SmallVectorImpl<char> &Result) {
// No space left in output buffer. Double the size of the underlying
// memory in the SmallVectorImpl, adjust pointer and length and continue
// the conversion.
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 8d66348..6f8e091 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -476,7 +476,7 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
std::min(NormalizedName.size(), UnicodeNameToCodepointLargestNameSize) +
1;
- LLVM_ATTRIBUTE_UNUSED static std::size_t Rows =
+ [[maybe_unused]] static std::size_t Rows =
UnicodeNameToCodepointLargestNameSize + 1;
std::vector<char> Distances(
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2c3870c..636d4f8a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -8217,6 +8217,8 @@ bool AArch64AsmParser::parseDataExpr(const MCExpr *&Res) {
Spec = AArch64::S_GOTPCREL;
else if (Identifier == "plt")
Spec = AArch64::S_PLT;
+ else if (Identifier == "funcinit")
+ Spec = AArch64::S_FUNCINIT;
}
if (Spec == AArch64::S_None)
return Error(Loc, "invalid relocation specifier");
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a388216..892b8da 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -232,6 +232,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(const MCFixup &Fixup,
}
if (RefKind == AArch64::S_AUTH || RefKind == AArch64::S_AUTHADDR)
return ELF::R_AARCH64_AUTH_ABS64;
+ if (RefKind == AArch64::S_FUNCINIT)
+ return ELF::R_AARCH64_FUNCINIT64;
return ELF::R_AARCH64_ABS64;
}
case AArch64::fixup_aarch64_add_imm12:
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 2b5cf34..bc090c6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -40,6 +40,7 @@ const MCAsmInfo::AtSpecifier ELFAtSpecifiers[] = {
{AArch64::S_GOT, "GOT"},
{AArch64::S_GOTPCREL, "GOTPCREL"},
{AArch64::S_PLT, "PLT"},
+ {AArch64::S_FUNCINIT, "FUNCINIT"},
};
const MCAsmInfo::AtSpecifier MachOAtSpecifiers[] = {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 0dfa61b..f2acff5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -164,6 +164,7 @@ enum {
// ELF relocation specifiers in data directives:
S_PLT = 0x400,
S_GOTPCREL,
+ S_FUNCINIT,
// Mach-O @ relocation specifiers:
S_MACHO_GOT,
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index dd6fa16..d71f728 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -130,6 +130,12 @@ SMECallAttrs::SMECallAttrs(const CallBase &CB, const AArch64TargetLowering *TLI)
if (auto *CalledFunction = CB.getCalledFunction())
CalledFn = SMEAttrs(*CalledFunction, TLI);
+ // An `invoke` of an agnostic ZA function may not return normally (it may
+ // resume in an exception block). In this case, it acts like a private ZA
+ // callee and may require a ZA save to be set up before it is called.
+ if (isa<InvokeInst>(CB))
+ CalledFn.set(SMEAttrs::ZA_State_Agnostic, /*Enable=*/false);
+
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
assert((IsIndirect ||
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80e985d..a2841c11 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18168,7 +18168,7 @@ Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
return CacheLineAlign;
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
assert(N->getOpcode() == ISD::CopyFromReg);
do {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a44a247..d516330 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -865,22 +865,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (DestReg == AMDGPU::VCC_LO) {
- if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
+ if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+ if (DestReg == AMDGPU::VCC_LO) {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return;
- }
-
- if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
@@ -898,22 +892,16 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (DestReg == AMDGPU::VCC) {
- if (AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
+ if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
+ if (DestReg == AMDGPU::VCC) {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return;
- }
-
- if (!AMDGPU::SReg_64_EncodableRegClass.contains(SrcReg)) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
index 39e651d..8945ec3 100644
--- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
+++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
@@ -166,7 +166,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
}
// TODO
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
static DecodeStatus DecodesFPR128RegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 974f653..5f180d6 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -667,11 +667,10 @@ static DecodeStatus DecodeHvxWRRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, HvxWRDecoderTable);
}
-LLVM_ATTRIBUTE_UNUSED // Suppress warning temporarily.
- static DecodeStatus
- DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const MCDisassembler *Decoder) {
+[[maybe_unused]] // Suppress warning temporarily.
+static DecodeStatus DecodeHvxVQRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const MCDisassembler *Decoder) {
static const MCPhysReg HvxVQRDecoderTable[] = {
Hexagon::VQ0, Hexagon::VQ1, Hexagon::VQ2, Hexagon::VQ3,
Hexagon::VQ4, Hexagon::VQ5, Hexagon::VQ6, Hexagon::VQ7};
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 5dde47a..a3296e0 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -419,8 +419,8 @@ namespace {
using HCE = HexagonConstExtenders;
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const OffsetRange &OR) {
if (OR.Min > OR.Max)
OS << '!';
OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)
@@ -435,8 +435,8 @@ namespace {
const HexagonRegisterInfo &HRI;
};
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &P) {
if (P.Rs.Reg != 0)
OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub);
else
@@ -451,8 +451,8 @@ namespace {
const HexagonRegisterInfo &HRI;
};
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const PrintExpr &P) {
OS << "## " << (P.Ex.Neg ? "- " : "+ ");
if (P.Ex.Rs.Reg != 0)
OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub);
@@ -469,15 +469,15 @@ namespace {
const HexagonRegisterInfo &HRI;
};
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const PrintInit &P) {
OS << '[' << P.ExtI.first << ", "
<< PrintExpr(P.ExtI.second, P.HRI) << ']';
return OS;
}
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtDesc &ED) {
assert(ED.OpNum != -1u);
const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent();
const MachineFunction &MF = *MBB.getParent();
@@ -493,8 +493,8 @@ namespace {
return OS;
}
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtRoot &ER) {
switch (ER.Kind) {
case MachineOperand::MO_Immediate:
OS << "imm:" << ER.V.ImmVal;
@@ -527,8 +527,8 @@ namespace {
return OS;
}
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const HCE::ExtValue &EV) {
OS << HCE::ExtRoot(EV) << " off:" << EV.Offset;
return OS;
}
@@ -540,8 +540,8 @@ namespace {
const HexagonRegisterInfo &HRI;
};
- LLVM_ATTRIBUTE_UNUSED
- raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {
+ [[maybe_unused]]
+ raw_ostream &operator<<(raw_ostream &OS, const PrintIMap &P) {
OS << "{\n";
for (const std::pair<const HCE::ExtenderInit, HCE::IndexList> &Q : P.IMap) {
OS << " " << PrintInit(Q.first, P.HRI) << " -> {";
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 4d96cfa..c7a4f68 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -789,7 +789,7 @@ struct ShuffleMask {
}
};
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
SM.print(OS);
return OS;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 87d052b..e4c0a16 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -364,7 +364,7 @@ private:
const HexagonVectorCombine &HVC;
};
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
OS << "Addr: " << *AI.Addr << '\n';
@@ -375,7 +375,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
return OS;
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
@@ -394,7 +394,7 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
return OS;
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS,
const AlignVectors::ByteSpan::Block &B) {
OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
@@ -408,7 +408,7 @@ raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
for (const AlignVectors::ByteSpan::Block &B : BS)
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index fa8ae60..2ff5843 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -111,7 +111,7 @@ namespace {
friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
};
- LLVM_ATTRIBUTE_UNUSED
+ [[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
const ChainOfDependences &CD = D.Chain;
int ChainSize = CD.size();
@@ -144,7 +144,7 @@ namespace {
bool isDefined() { return Inst2Replace != nullptr; }
};
- LLVM_ATTRIBUTE_UNUSED
+ [[maybe_unused]]
raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
OS << "** ReuseValue ***\n";
OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index ca98269..e3094b4 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -275,7 +275,7 @@ namespace HexagonII {
INST_ICLASS_ALU32_3 = 0xf0000000
};
- LLVM_ATTRIBUTE_UNUSED
+ [[maybe_unused]]
static unsigned getMemAccessSizeInBytes(MemAccessSize S) {
switch (S) {
case ByteAccess: return 1;
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 96ad5c6..0a8838c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -156,13 +156,13 @@ FunctionPass *llvm::createRISCVVLOptimizerPass() {
return new RISCVVLOptimizer();
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
static raw_ostream &operator<<(raw_ostream &OS, const OperandInfo &OI) {
OI.print(OS);
return OS;
}
-LLVM_ATTRIBUTE_UNUSED
+[[maybe_unused]]
static raw_ostream &operator<<(raw_ostream &OS,
const std::optional<OperandInfo> &OI) {
if (OI)
diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index a0cf881..5a06ea3 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -24,6 +24,7 @@ class SystemZTargetMachine;
namespace SystemZ {
// Condition-code mask values.
+const unsigned CCMASK_NONE = 0;
const unsigned CCMASK_0 = 1 << 3;
const unsigned CCMASK_1 = 1 << 2;
const unsigned CCMASK_2 = 1 << 1;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3b7d11a..de28faf 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -15,6 +15,7 @@
#include "SystemZConstantPoolValue.h"
#include "SystemZMachineFunctionInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,6 +25,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsS390.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -1514,6 +1516,9 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
}
+ } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
+ if (StringRef("{@cc}").compare(Constraint) == 0)
+ return C_Other;
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -1707,6 +1712,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
SystemZMC::VR128Regs, 32);
}
+ if (Constraint[1] == '@') {
+ if (StringRef("{@cc}").compare(Constraint) == 0)
+ return std::make_pair(0u, &SystemZ::GR32BitRegClass);
+ }
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
@@ -1737,6 +1746,38 @@ Register SystemZTargetLowering::getExceptionSelectorRegister(
return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
}
+// Convert condition code in CCReg to an i32 value.
+static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
+ SDLoc DL(CCReg);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
+ return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
+}
+
+// Lower @cc targets via setcc.
+SDValue SystemZTargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+ if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
+ return SDValue();
+
+ // Check that return type is valid.
+ if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+ OpInfo.ConstraintVT.getSizeInBits() < 8)
+ report_fatal_error("Glue output operand is of invalid type");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(SystemZ::CC);
+
+ if (Glue.getNode()) {
+ Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
+ Chain = Glue.getValue(1);
+ } else
+ Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
+ return getCCResult(DAG, Glue);
+}
+
void SystemZTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
@@ -5300,14 +5341,6 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
Node->getMemoryVT(), Node->getMemOperand());
}
-// Convert condition code in CCReg to an i32 value.
-static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
- SDLoc DL(CCReg);
- SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
- return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
- DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
-}
-
SDValue
SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
@@ -8723,95 +8756,247 @@ SDValue SystemZTargetLowering::combineSETCC(
return SDValue();
}
-static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
+static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
+ switch (Val.getOpcode()) {
+ default:
+ return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+ case SystemZISD::IPM:
+ if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
+ Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
+ return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
+ return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
+ case SystemZISD::SELECT_CCMASK: {
+ SDValue Op4CCReg = Val.getOperand(4);
+ if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
+ Op4CCReg.getOpcode() == SystemZISD::TM) {
+ auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
+ if (OpCC != SDValue())
+ return std::make_pair(OpCC, OpCCValid);
+ }
+ auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+ if (!CCValid)
+ return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
+ int CCValidVal = CCValid->getZExtValue();
+ return std::make_pair(Op4CCReg, CCValidVal);
+ }
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
+ if (Op0CC != SDValue())
+ return std::make_pair(Op0CC, Op0CCValid);
+ return findCCUse(Val.getOperand(1));
+ }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+ SelectionDAG &DAG);
+
+SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC,
+ SelectionDAG &DAG) {
+ SDLoc DL(Val);
+ auto Opcode = Val.getOpcode();
+ switch (Opcode) {
+ default:
+ return {};
+ case ISD::Constant:
+ return {Val, Val, Val, Val};
+ case SystemZISD::IPM: {
+ SDValue IPMOp0 = Val.getOperand(0);
+ if (IPMOp0 != CC)
+ return {};
+ SmallVector<SDValue, 4> ShiftedCCVals;
+ for (auto CC : {0, 1, 2, 3})
+ ShiftedCCVals.emplace_back(
+ DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
+ return ShiftedCCVals;
+ }
+ case SystemZISD::SELECT_CCMASK: {
+ SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+ auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
+ auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+ if (!CCValid || !CCMask)
+ return {};
+
+ int CCValidVal = CCValid->getZExtValue();
+ int CCMaskVal = CCMask->getZExtValue();
+ const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
+ const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
+ if (TrueSDVals.empty() || FalseSDVals.empty())
+ return {};
+ SDValue Op4CCReg = Val.getOperand(4);
+ if (Op4CCReg != CC)
+ combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
+ if (Op4CCReg != CC)
+ return {};
+ SmallVector<SDValue, 4> MergedSDVals;
+ for (auto &CCVal : {0, 1, 2, 3})
+ MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
+ ? TrueSDVals[CCVal]
+ : FalseSDVals[CCVal]);
+ return MergedSDVals;
+ }
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SRA:
+ // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
+ // would clobber CC).
+ if (!Val.hasOneUse())
+ return {};
+ [[fallthrough]];
+ case ISD::SHL:
+ case ISD::SRL:
+ SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
+ const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
+ const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
+ if (Op0SDVals.empty() || Op1SDVals.empty())
+ return {};
+ SmallVector<SDValue, 4> BinaryOpSDVals;
+ for (auto CCVal : {0, 1, 2, 3})
+ BinaryOpSDVals.emplace_back(DAG.getNode(
+ Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
+ return BinaryOpSDVals;
+ }
+}
+
+static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
+ SelectionDAG &DAG) {
// We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
// set by the CCReg instruction using the CCValid / CCMask masks,
- // If the CCReg instruction is itself a ICMP testing the condition
+ // If the CCReg instruction is itself a ICMP / TM testing the condition
// code set by some other instruction, see whether we can directly
// use that condition code.
-
- // Verify that we have an ICMP against some constant.
- if (CCValid != SystemZ::CCMASK_ICMP)
- return false;
- auto *ICmp = CCReg.getNode();
- if (ICmp->getOpcode() != SystemZISD::ICMP)
- return false;
- auto *CompareLHS = ICmp->getOperand(0).getNode();
- auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
- if (!CompareRHS)
+ auto *CCNode = CCReg.getNode();
+ if (!CCNode)
return false;
- // Optimize the case where CompareLHS is a SELECT_CCMASK.
- if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
- // Verify that we have an appropriate mask for a EQ or NE comparison.
- bool Invert = false;
- if (CCMask == SystemZ::CCMASK_CMP_NE)
- Invert = !Invert;
- else if (CCMask != SystemZ::CCMASK_CMP_EQ)
+ if (CCNode->getOpcode() == SystemZISD::TM) {
+ if (CCValid != SystemZ::CCMASK_TM)
return false;
-
- // Verify that the ICMP compares against one of select values.
- auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
- if (!TrueVal)
- return false;
- auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
- if (!FalseVal)
+ auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
+ auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+ auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+ if (!Op0Node || !Op1Node)
+ return -1;
+ auto Op0APVal = Op0Node->getAPIntValue();
+ auto Op1APVal = Op1Node->getAPIntValue();
+ auto Result = Op0APVal & Op1APVal;
+ bool AllOnes = Result == Op1APVal;
+ bool AllZeros = Result == 0;
+ bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
+ return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
+ };
+ SDValue Op0 = CCNode->getOperand(0);
+ SDValue Op1 = CCNode->getOperand(1);
+ auto [Op0CC, Op0CCValid] = findCCUse(Op0);
+ if (Op0CC == SDValue())
return false;
- if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
- Invert = !Invert;
- else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
+ const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
+ const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
+ if (Op0SDVals.empty() || Op1SDVals.empty())
return false;
-
- // Compute the effective CC mask for the new branch or select.
- auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
- auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
- if (!NewCCValid || !NewCCMask)
- return false;
- CCValid = NewCCValid->getZExtValue();
- CCMask = NewCCMask->getZExtValue();
- if (Invert)
- CCMask ^= CCValid;
-
- // Return the updated CCReg link.
- CCReg = CompareLHS->getOperand(4);
+ int NewCCMask = 0;
+ for (auto CC : {0, 1, 2, 3}) {
+ auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
+ if (CCVal < 0)
+ return false;
+ NewCCMask <<= 1;
+ NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+ }
+ NewCCMask &= Op0CCValid;
+ CCReg = Op0CC;
+ CCMask = NewCCMask;
+ CCValid = Op0CCValid;
return true;
}
+ if (CCNode->getOpcode() != SystemZISD::ICMP ||
+ CCValid != SystemZ::CCMASK_ICMP)
+ return false;
- // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
- if (CompareLHS->getOpcode() == ISD::SRA) {
- auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
- if (!SRACount || SRACount->getZExtValue() != 30)
- return false;
- auto *SHL = CompareLHS->getOperand(0).getNode();
- if (SHL->getOpcode() != ISD::SHL)
- return false;
- auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
- if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
- return false;
- auto *IPM = SHL->getOperand(0).getNode();
- if (IPM->getOpcode() != SystemZISD::IPM)
- return false;
-
- // Avoid introducing CC spills (because SRA would clobber CC).
- if (!CompareLHS->hasOneUse())
- return false;
- // Verify that the ICMP compares against zero.
- if (CompareRHS->getZExtValue() != 0)
+ SDValue CmpOp0 = CCNode->getOperand(0);
+ SDValue CmpOp1 = CCNode->getOperand(1);
+ SDValue CmpOp2 = CCNode->getOperand(2);
+ auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
+ if (Op0CC != SDValue()) {
+ const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
+ const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
+ if (Op0SDVals.empty() || Op1SDVals.empty())
return false;
- // Compute the effective CC mask for the new branch or select.
- CCMask = SystemZ::reverseCCMask(CCMask);
-
- // Return the updated CCReg link.
- CCReg = IPM->getOperand(0);
+ auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
+ auto CmpTypeVal = CmpType->getZExtValue();
+ const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
+ const SDValue &Op1Val) {
+ auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
+ auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
+ if (!Op0Node || !Op1Node)
+ return -1;
+ auto Op0APVal = Op0Node->getAPIntValue();
+ auto Op1APVal = Op1Node->getAPIntValue();
+ if (CmpTypeVal == SystemZICMP::SignedOnly)
+ return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
+ return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
+ };
+ int NewCCMask = 0;
+ for (auto CC : {0, 1, 2, 3}) {
+ auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
+ if (CCVal < 0)
+ return false;
+ NewCCMask <<= 1;
+ NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
+ }
+ NewCCMask &= Op0CCValid;
+ CCMask = NewCCMask;
+ CCReg = Op0CC;
+ CCValid = Op0CCValid;
return true;
}
return false;
}
-SDValue SystemZTargetLowering::combineBR_CCMASK(
- SDNode *N, DAGCombinerInfo &DCI) const {
+// Merging versus split in multiple branches cost.
+TargetLoweringBase::CondMergingParams
+SystemZTargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
+ const Value *Lhs,
+ const Value *Rhs) const {
+ const auto isFlagOutOpCC = [](const Value *V) {
+ using namespace llvm::PatternMatch;
+ const Value *RHSVal;
+ const APInt *RHSC;
+ if (const auto *I = dyn_cast<Instruction>(V)) {
+ // PatternMatch.h provides concise tree-based pattern match of llvm IR.
+ if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
+ match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
+ if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
+ if (CB->isInlineAsm()) {
+ const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
+ return IA &&
+ IA->getConstraintString().find("{@cc}") != std::string::npos;
+ }
+ }
+ }
+ }
+ return false;
+ };
+ // Pattern (ICmp %asm) or (ICmp (And %asm)).
+ // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
+ // BaseCost can be set >=2. If cost of instruction <= CostThreshold
+ // conditionals will be merged or else conditionals will be split.
+ if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
+ return {3, 0, -1};
+ // Default.
+ return {-1, -1, -1};
+}
+
+SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
@@ -8824,8 +9009,7 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
int CCMaskVal = CCMask->getZExtValue();
SDValue Chain = N->getOperand(0);
SDValue CCReg = N->getOperand(4);
-
- if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
+ if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Chain,
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
@@ -8848,16 +9032,80 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
int CCMaskVal = CCMask->getZExtValue();
SDValue CCReg = N->getOperand(4);
- if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
- return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1),
- DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
- DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
- CCReg);
+ bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
+
+ // Populate SDVals vector for each condition code ccval for given Val, which
+ // can again be another nested select_ccmask with the same CC.
+ const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
+ if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
+ SmallVector<SDValue, 4> Res;
+ if (Val.getOperand(4) != CCReg)
+ return SmallVector<SDValue, 4>{};
+ SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
+ auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
+ if (!CCMask)
+ return SmallVector<SDValue, 4>{};
+
+ int CCMaskVal = CCMask->getZExtValue();
+ for (auto &CC : {0, 1, 2, 3})
+ Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
+ : FalseVal);
+ return Res;
+ }
+ return SmallVector<SDValue, 4>{Val, Val, Val, Val};
+ };
+ // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
+ // with CCReg found by combineCCMask or original CCReg.
+ SDValue TrueVal = N->getOperand(0);
+ SDValue FalseVal = N->getOperand(1);
+ auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
+ auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
+ // TrueSDVals/FalseSDVals might be empty in case of non-constant
+ // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
+ if (TrueSDVals.empty())
+ TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
+ if (FalseSDVals.empty())
+ FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
+ if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
+ SmallSet<SDValue, 4> MergedSDValsSet;
+ // Ignoring CC values outside CCValiid.
+ for (auto CC : {0, 1, 2, 3}) {
+ if ((CCValidVal & ((1 << (3 - CC)))) != 0)
+ MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
+ ? TrueSDVals[CC]
+ : FalseSDVals[CC]);
+ }
+ if (MergedSDValsSet.size() == 1)
+ return *MergedSDValsSet.begin();
+ if (MergedSDValsSet.size() == 2) {
+ auto BeginIt = MergedSDValsSet.begin();
+ SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
+ if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
+ std::swap(NewTrueVal, NewFalseVal);
+ int NewCCMask = 0;
+ for (auto CC : {0, 1, 2, 3}) {
+ NewCCMask <<= 1;
+ NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
+ ? (TrueSDVals[CC] == NewTrueVal)
+ : (FalseSDVals[CC] == NewTrueVal);
+ }
+ CCMaskVal = NewCCMask;
+ CCMaskVal &= CCValidVal;
+ TrueVal = NewTrueVal;
+ FalseVal = NewFalseVal;
+ IsCombinedCCReg = true;
+ }
+ }
+
+ if (IsCombinedCCReg)
+ return DAG.getNode(
+ SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
+ FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
+ DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
+
return SDValue();
}
-
SDValue SystemZTargetLowering::combineGET_CCMASK(
SDNode *N, DAGCombinerInfo &DCI) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index f8706b7..d5b7603 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -533,6 +533,18 @@ public:
}
const char *getTargetNodeName(unsigned Opcode) const override;
+
+ // This function currently returns cost for srl/ipm/cc sequence for merging.
+ CondMergingParams
+ getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
+ const Value *Rhs) const override;
+
+ // Handle Lowering flag assembly outputs.
+ SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ const SDLoc &DL,
+ const AsmOperandInfo &Constraint,
+ SelectionDAG &DAG) const override;
+
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 28fa2cd..b81641f 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -414,6 +414,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
+ getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
+
// fp constants
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64})
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 3af8b3e..2bf016a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1335,9 +1335,8 @@ def ProcessorFeatures {
!listconcat(ARLFeatures, ARLSAdditionalFeatures);
// Pantherlake
- list<SubtargetFeature> PTLAdditionalFeatures = [FeaturePREFETCHI];
list<SubtargetFeature> PTLFeatures =
- !listremove(!listconcat(ARLSFeatures, PTLAdditionalFeatures), [FeatureWIDEKL]);
+ !listremove(ARLSFeatures, [FeatureWIDEKL]);
// Clearwaterforest
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eea84a2..c32b1a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
BaseCost += 1;
+
+ // For OR conditions with EQ comparisons, prefer splitting into branches
+ // (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops,
+ // unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed
+ // comparisons (SLT, SGT) that can be optimized.
+ if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or &&
+ match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
+ match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
+ return {-1, -1, -1};
+
return {BaseCost, BrMergingLikelyBias.getValue(),
BrMergingUnlikelyBias.getValue()};
}
@@ -3787,7 +3797,7 @@ static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
/// Return true if every element in Mask, is an in-place blend/select mask or is
/// undef.
-LLVM_ATTRIBUTE_UNUSED static bool isBlendOrUndef(ArrayRef<int> Mask) {
+[[maybe_unused]] static bool isBlendOrUndef(ArrayRef<int> Mask) {
unsigned NumElts = Mask.size();
for (auto [I, M] : enumerate(Mask))
if (!isUndefOrEqual(M, I) && !isUndefOrEqual(M, I + NumElts))
@@ -8096,7 +8106,7 @@ static SDValue LowerBUILD_VECTORvXi1(SDValue Op, const SDLoc &dl,
return DstVec;
}
-LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) {
+[[maybe_unused]] static bool isHorizOp(unsigned Opcode) {
switch (Opcode) {
case X86ISD::PACKSS:
case X86ISD::PACKUS:
@@ -20813,7 +20823,7 @@ SDValue X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// for DAG type consistency we have to match the FP operand type.
APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
- LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+ [[maybe_unused]] APFloat::opStatus Status = APFloat::opOK;
bool LosesInfo = false;
if (TheVT == MVT::f64)
// The rounding mode is irrelevant as the conversion should be exact.
@@ -22856,7 +22866,7 @@ static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
// be generated by the memcmp expansion pass with oversized integer compares
// (see PR33325).
bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
- if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+ if (isNullConstant(Y) && OpSize == 128 && !IsOrXorXorTreeCCZero)
return SDValue();
// Don't perform this combine if constructing the vector will be expensive.
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index edca7c1..1932a3a 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -175,7 +175,7 @@ constexpr FeatureBitset FeaturesArrowlakeS =
FeaturesArrowlake | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4;
constexpr FeatureBitset FeaturesPantherlake =
- (FeaturesArrowlakeS ^ FeatureWIDEKL) | FeaturePREFETCHI;
+ (FeaturesArrowlakeS ^ FeatureWIDEKL);
constexpr FeatureBitset FeaturesClearwaterforest =
(FeaturesSierraforest ^ FeatureWIDEKL) | FeatureAVXVNNIINT16 |
FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeaturePREFETCHI | FeatureUSERMSR;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6b67b48..09cb225 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2979,10 +2979,14 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
"Op must be either i1 or vector of i1.");
if (SI.getCondition()->getType() != Op->getType())
return nullptr;
- if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL))
- return SelectInst::Create(Op,
- IsAnd ? V : ConstantInt::getTrue(Op->getType()),
- IsAnd ? ConstantInt::getFalse(Op->getType()) : V);
+ if (Value *V = simplifyNestedSelectsUsingImpliedCond(SI, Op, IsAnd, DL)) {
+ Instruction *MDFrom = nullptr;
+ if (!ProfcheckDisableMetadataFixes)
+ MDFrom = &SI;
+ return SelectInst::Create(
+ Op, IsAnd ? V : ConstantInt::getTrue(Op->getType()),
+ IsAnd ? ConstantInt::getFalse(Op->getType()) : V, "", nullptr, MDFrom);
+ }
return nullptr;
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index ff5f390..66e45ec 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -266,8 +266,7 @@ void DFAJumpThreading::unfold(DomTreeUpdater *DTU, LoopInfo *LI,
if (!ProfcheckDisableMetadataFixes)
BI->setMetadata(LLVMContext::MD_prof,
SI->getMetadata(LLVMContext::MD_prof));
- DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
- {DominatorTree::Insert, StartBlock, NewBlock}});
+ DTU->applyUpdates({{DominatorTree::Insert, StartBlock, NewBlock}});
} else {
BasicBlock *EndBlock = SIUse->getParent();
BasicBlock *NewBlockT = BasicBlock::Create(
@@ -1479,10 +1478,13 @@ bool DFAJumpThreading::run(Function &F) {
DTU->flush();
#ifdef EXPENSIVE_CHECKS
- assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full));
verifyFunction(F, &dbgs());
#endif
+ if (MadeChanges && VerifyDomInfo)
+ assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Full) &&
+ "Failed to maintain validity of domtree!");
+
return MadeChanges;
}
diff --git a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 213d0f3..1335665 100644
--- a/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -39,10 +39,11 @@ public:
private:
AliasAnalysis *AA;
};
+} // namespace
/// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function,
/// iterating until no more changes are made.
-bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
+static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
bool Changed = false;
bool LocalChange = true;
@@ -67,7 +68,6 @@ bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
}
return Changed;
}
-} // namespace
char FlattenCFGLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index b9d332b..d2f09e9 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -118,9 +118,13 @@ STATISTIC(
STATISTIC(NumDeleted, "Number of instructions deleted");
STATISTIC(NumVectorized, "Number of vectorized aggregates");
+namespace llvm {
/// Disable running mem2reg during SROA in order to test or debug SROA.
static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
cl::Hidden);
+extern cl::opt<bool> ProfcheckDisableMetadataFixes;
+} // namespace llvm
+
namespace {
class AllocaSliceRewriter;
@@ -1777,7 +1781,8 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
}
Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
- LI.getName() + ".sroa.speculated");
+ LI.getName() + ".sroa.speculated",
+ ProfcheckDisableMetadataFixes ? nullptr : &SI);
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI.replaceAllUsesWith(V);
@@ -4360,10 +4365,13 @@ private:
};
Value *Cond, *True, *False;
+ Instruction *MDFrom = nullptr;
if (auto *SI = dyn_cast<SelectInst>(Sel)) {
Cond = SI->getCondition();
True = SI->getTrueValue();
False = SI->getFalseValue();
+ if (!ProfcheckDisableMetadataFixes)
+ MDFrom = SI;
} else {
Cond = Sel->getOperand(0);
True = ConstantInt::get(Sel->getType(), 1);
@@ -4383,8 +4391,12 @@ private:
IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
False->getName() + ".sroa.gep", NW);
- Value *NSel =
- IRB.CreateSelect(Cond, NTrue, NFalse, Sel->getName() + ".sroa.sel");
+ Value *NSel = MDFrom
+ ? IRB.CreateSelect(Cond, NTrue, NFalse,
+ Sel->getName() + ".sroa.sel", MDFrom)
+ : IRB.CreateSelectWithUnknownProfile(
+ Cond, NTrue, NFalse, DEBUG_TYPE,
+ Sel->getName() + ".sroa.sel");
Visited.erase(&GEPI);
GEPI.replaceAllUsesWith(NSel);
GEPI.eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 2ee91a9..0f3978f 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -47,6 +47,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include <cassert>
#include <utility>
@@ -321,7 +322,7 @@ class StructurizeCFG {
void collectInfos();
- void insertConditions(bool Loops);
+ void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
void simplifyConditions();
@@ -671,10 +672,9 @@ void StructurizeCFG::collectInfos() {
}
/// Insert the missing branch conditions
-void StructurizeCFG::insertConditions(bool Loops) {
+void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
BranchVector &Conds = Loops ? LoopConds : Conditions;
Value *Default = Loops ? BoolTrue : BoolFalse;
- SSAUpdater PhiInserter;
for (BranchInst *Term : Conds) {
assert(Term->isConditional());
@@ -683,8 +683,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
BasicBlock *SuccTrue = Term->getSuccessor(0);
BasicBlock *SuccFalse = Term->getSuccessor(1);
- PhiInserter.Initialize(Boolean, "");
- PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
+ unsigned Variable = PhiInserter.AddVariable("", Boolean);
+ PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
+ Default);
BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
@@ -697,7 +698,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
ParentInfo = PI;
break;
}
- PhiInserter.AddAvailableValue(BB, PI.Pred);
+ PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
Dominator.addAndRememberBlock(BB);
}
@@ -706,9 +707,9 @@ void StructurizeCFG::insertConditions(bool Loops) {
CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
} else {
if (!Dominator.resultIsRememberedBlock())
- PhiInserter.AddAvailableValue(Dominator.result(), Default);
+ PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
- Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
+ PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
}
}
}
@@ -1414,8 +1415,12 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
orderNodes();
collectInfos();
createFlow();
- insertConditions(false);
- insertConditions(true);
+
+ SSAUpdaterBulk PhiInserter;
+ insertConditions(false, PhiInserter);
+ insertConditions(true, PhiInserter);
+ PhiInserter.RewriteAndOptimizeAllUses(*DT);
+
setPhiValues();
simplifyHoistedPhis();
simplifyConditions();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8ca3bed..23f5623 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -24,12 +24,9 @@
#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
-#include "VPlanAnalysis.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
@@ -41,10 +38,11 @@
#include "llvm/IR/Operator.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/InstructionCost.h"
-#include <algorithm>
#include <cassert>
#include <cstddef>
+#include <functional>
#include <string>
+#include <utility>
namespace llvm {
@@ -346,13 +344,6 @@ public:
/// Return the cost of the block.
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0;
- /// Return true if it is legal to hoist instructions into this block.
- bool isLegalToHoistInto() {
- // There are currently no constraints that prevent an instruction to be
- // hoisted into a VPBlockBase.
- return true;
- }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
OS << getName();
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index acac2c9..0c1f37b 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64
; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i10 = sub i64 %i9, %i4
-; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
+; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char
; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
-; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1
+; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2
; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
; X64-NEXT: Loop %bb6: Trip multiple is 1
;
@@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
; X32-NEXT: %i8 = load i8, ptr %i7, align 1
; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64
-; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
+; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i10 = sub i64 %i9, %i4
-; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
+; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) {
; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char
; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
-; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1
+; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2
; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
; X32-NEXT: Loop %bb6: Trip multiple is 1
;
@@ -459,7 +459,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64
; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i10 = sub i64 %i9, %i4
-; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
+; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable }
; X64-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -470,7 +470,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable }
; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
-; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1
+; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2
; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64))
; X64-NEXT: Loop %bb6: Trip multiple is 1
;
@@ -483,9 +483,9 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
; X32-NEXT: %i8 = load i8, ptr %i7, align 1
; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64
-; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
+; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i10 = sub i64 %i9, %i4
-; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
+; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10
; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable }
; X32-NEXT: %i12 = load i8, ptr %i11, align 1
@@ -496,7 +496,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) {
; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable }
; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped
; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
-; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1
+; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2
; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32))
; X32-NEXT: Loop %bb6: Trip multiple is 1
;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir
new file mode 100644
index 0000000..824ada1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir
@@ -0,0 +1,278 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s
+
+---
+name: Cst
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @Cst
+ ; CHECK-NEXT: %0:_ KnownBits:00000010 SignBits:6
+ ; CHECK-NEXT: %1:_ KnownBits:00011000 SignBits:3
+ ; CHECK-NEXT: %2:_ KnownBits:00011010 SignBits:3
+ %0:_(s8) = G_CONSTANT i8 2
+ %1:_(s8) = G_CONSTANT i8 24
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: CstZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CstZero
+ ; CHECK-NEXT: %0:_ KnownBits:00000001 SignBits:7
+ ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+ ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8
+ %0:_(s8) = G_CONSTANT i8 1
+ %1:_(s8) = G_CONSTANT i8 255
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: CstNegOne
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CstNegOne
+ ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8
+ ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+ ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8
+ %0:_(s8) = G_CONSTANT i8 0
+ %1:_(s8) = G_CONSTANT i8 255
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: CstSeven
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CstSeven
+ ; CHECK-NEXT: %0:_ KnownBits:00001000 SignBits:4
+ ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8
+ ; CHECK-NEXT: %2:_ KnownBits:00000111 SignBits:5
+ %0:_(s8) = G_CONSTANT i8 8
+ %1:_(s8) = G_CONSTANT i8 255
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: CstNeg
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CstNeg
+ ; CHECK-NEXT: %0:_ KnownBits:11100000 SignBits:3
+ ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:11100010 SignBits:3
+ %0:_(s8) = G_CONSTANT i8 224
+ %1:_(s8) = G_CONSTANT i8 2
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: ScalarVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = COPY $b1
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: ScalarRhsEarlyOut
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarRhsEarlyOut
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_ADD %0, %1
+...
+---
+name: ScalarNonNegative
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarNonNegative
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4
+ ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4
+ ; CHECK-NEXT: %3:_ KnownBits:11111111 SignBits:8
+ ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:4
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 15
+ %2:_(s8) = G_AND %0, %1
+ %3:_(s8) = G_CONSTANT i8 255
+ %4:_(s8) = G_ADD %2, %3
+...
+---
+name: ScalarLhsEarlyOut
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarLhsEarlyOut
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_ADD %1, %0
+...
+---
+name: ScalarPartKnown
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarPartKnown
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4
+ ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4
+ ; CHECK-NEXT: %3:_ KnownBits:00000101 SignBits:5
+ ; CHECK-NEXT: %4:_ KnownBits:000????? SignBits:3
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 15
+ %2:_(s8) = G_AND %0, %1
+ %3:_(s8) = G_CONSTANT i8 5
+ %4:_(s8) = G_ADD %2, %3
+...
+---
+name: VectorCstZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCstZero
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15
+ ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000001 SignBits:15
+ ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %4:_ KnownBits:0000000000000000 SignBits:16
+ %0:_(s16) = G_CONSTANT i16 1
+ %1:_(s16) = G_CONSTANT i16 65535
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %4:_(<4 x s16>) = G_ADD %2, %3
+...
+---
+name: VectorCstNegOne
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCstNegOne
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16
+ ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000000 SignBits:16
+ ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16
+ %0:_(s16) = G_CONSTANT i16 0
+ %1:_(s16) = G_CONSTANT i16 65535
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %4:_(<4 x s16>) = G_ADD %2, %3
+...
+---
+name: VectorVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<4 x s16>) = G_ADD %0, %1
+...
+---
+name: VectorRhsEarlyOut
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorRhsEarlyOut
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 3
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %3:_(<4 x s16>) = G_ADD %2, %0
+...
+---
+name: VectorNonNegative
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorNonNegative
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8
+ ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8
+ ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
+ ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %5:_ KnownBits:1111111111111111 SignBits:16
+ ; CHECK-NEXT: %6:_ KnownBits:???????????????? SignBits:8
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 255
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %3:_(<4 x s16>) = G_AND %0, %2
+ %4:_(s16) = G_CONSTANT i16 65535
+ %5:_(<4 x s16>) = G_BUILD_VECTOR %4, %4, %4, %4
+ %6:_(<4 x s16>) = G_ADD %3, %5
+...
+---
+name: VectorLhsEarlyOut
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorLhsEarlyOut
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 3
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %3:_(<4 x s16>) = G_ADD %0, %2
+...
+---
+name: VectorPartKnown
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorPartKnown
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8
+ ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8
+ ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8
+ ; CHECK-NEXT: %4:_ KnownBits:0000000000101010 SignBits:10
+ ; CHECK-NEXT: %5:_ KnownBits:0000000001001010 SignBits:9
+ ; CHECK-NEXT: %6:_ KnownBits:000000000??01010 SignBits:9
+ ; CHECK-NEXT: %7:_ KnownBits:0000000????????? SignBits:7
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 255
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %3:_(<4 x s16>) = G_AND %0, %2
+ %4:_(s16) = G_CONSTANT i16 42
+ %5:_(s16) = G_CONSTANT i16 74
+ %6:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %5, %4
+ %7:_(<4 x s16>) = G_ADD %6, %3
+...
+---
+name: VectorCst36
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCst36
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000110 SignBits:13
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000?1? SignBits:13
+ ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13
+ ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12
+ %0:_(s16) = G_CONSTANT i16 3
+ %1:_(s16) = G_CONSTANT i16 6
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+ %4:_(<4 x s16>) = G_ADD %2, %3
+...
+
+---
+name: VectorCst3unknown
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCst3unknown
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = COPY $h0
+ %2:_(s16) = G_CONSTANT i16 3
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1
+ %4:_(<4 x s16>) = G_ADD %0, %3
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index dc88f94..cca190f 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1774,3 +1774,88 @@ define i128 @combine_i128_sdiv_const100(i128 %x) {
%1 = sdiv i128 %x, 100
ret i128 %1
}
+
+; The following only becomes an sdiv_by_one after type legalisation, after which
+; the splatted scalar constant has a different type to the splat vector. This
+; test verifies DAGCombiner does not care about this type difference.
+define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-features"="+sve" {
+; CHECK-SD-LABEL: combine_vec_sdiv_by_one_obfuscated:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: combine_vec_sdiv_by_one_obfuscated:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: movi v3.8h, #1
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: mov v3.h[0], v2.h[0]
+; CHECK-GI-NEXT: smov w9, v3.h[0]
+; CHECK-GI-NEXT: smov w16, v3.h[7]
+; CHECK-GI-NEXT: sdiv w14, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[1]
+; CHECK-GI-NEXT: smov w9, v3.h[1]
+; CHECK-GI-NEXT: sdiv w15, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[2]
+; CHECK-GI-NEXT: smov w9, v3.h[2]
+; CHECK-GI-NEXT: sdiv w13, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[3]
+; CHECK-GI-NEXT: smov w9, v3.h[3]
+; CHECK-GI-NEXT: sdiv w12, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[4]
+; CHECK-GI-NEXT: smov w9, v3.h[4]
+; CHECK-GI-NEXT: sdiv w11, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[5]
+; CHECK-GI-NEXT: smov w9, v3.h[5]
+; CHECK-GI-NEXT: sdiv w10, w8, w9
+; CHECK-GI-NEXT: smov w8, v0.h[6]
+; CHECK-GI-NEXT: smov w9, v3.h[6]
+; CHECK-GI-NEXT: movi v3.8h, #1
+; CHECK-GI-NEXT: smov w17, v3.h[0]
+; CHECK-GI-NEXT: smov w18, v3.h[1]
+; CHECK-GI-NEXT: smov w0, v3.h[2]
+; CHECK-GI-NEXT: smov w1, v3.h[3]
+; CHECK-GI-NEXT: smov w2, v3.h[4]
+; CHECK-GI-NEXT: smov w3, v3.h[5]
+; CHECK-GI-NEXT: sdiv w8, w8, w9
+; CHECK-GI-NEXT: smov w9, v0.h[7]
+; CHECK-GI-NEXT: fmov s0, w14
+; CHECK-GI-NEXT: mov v0.h[1], w15
+; CHECK-GI-NEXT: smov w15, v1.h[6]
+; CHECK-GI-NEXT: mov v0.h[2], w13
+; CHECK-GI-NEXT: sdiv w9, w9, w16
+; CHECK-GI-NEXT: smov w16, v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], w12
+; CHECK-GI-NEXT: smov w12, v1.h[7]
+; CHECK-GI-NEXT: mov v0.h[4], w11
+; CHECK-GI-NEXT: sdiv w16, w16, w17
+; CHECK-GI-NEXT: smov w17, v1.h[1]
+; CHECK-GI-NEXT: mov v0.h[5], w10
+; CHECK-GI-NEXT: mov v0.h[6], w8
+; CHECK-GI-NEXT: sdiv w17, w17, w18
+; CHECK-GI-NEXT: smov w18, v1.h[2]
+; CHECK-GI-NEXT: fmov s2, w16
+; CHECK-GI-NEXT: smov w16, v3.h[6]
+; CHECK-GI-NEXT: mov v0.h[7], w9
+; CHECK-GI-NEXT: sdiv w18, w18, w0
+; CHECK-GI-NEXT: smov w0, v1.h[3]
+; CHECK-GI-NEXT: mov v2.h[1], w17
+; CHECK-GI-NEXT: sdiv w0, w0, w1
+; CHECK-GI-NEXT: smov w1, v1.h[4]
+; CHECK-GI-NEXT: mov v2.h[2], w18
+; CHECK-GI-NEXT: sdiv w1, w1, w2
+; CHECK-GI-NEXT: smov w2, v1.h[5]
+; CHECK-GI-NEXT: mov v2.h[3], w0
+; CHECK-GI-NEXT: sdiv w14, w2, w3
+; CHECK-GI-NEXT: mov v2.h[4], w1
+; CHECK-GI-NEXT: sdiv w13, w15, w16
+; CHECK-GI-NEXT: smov w15, v3.h[7]
+; CHECK-GI-NEXT: mov v2.h[5], w14
+; CHECK-GI-NEXT: sdiv w10, w12, w15
+; CHECK-GI-NEXT: mov v2.h[6], w13
+; CHECK-GI-NEXT: mov v2.h[7], w10
+; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: ret
+ %zero_and_ones = shufflevector <16 x i16> zeroinitializer, <16 x i16> splat (i16 1), <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %div = sdiv <16 x i16> %x, %zero_and_ones
+ ret <16 x i16> %div
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
index b6dee97e..b8d6c88 100644
--- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
+++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll
@@ -732,6 +732,247 @@ exit:
ret void
}
+; This example corresponds to:
+;
+; __arm_agnostic("sme_za_state") void try_catch_agnostic_za_invoke()
+; {
+; try {
+; agnostic_za_call();
+; } catch(...) {
+; }
+; }
+;
+; In this example we preserve all SME state enabled by PSTATE.ZA using
+; `__arm_sme_save` before agnostic_za_call(). This is because on all normal
+; returns from an agnostic ZA function ZA state should be preserved. That means
+; we need to make sure ZA state is saved in case agnostic_za_call() throws, and
+; we need to restore ZA state after unwinding to the catch block.
+
+define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: try_catch_agnostic_za_invoke:
+; CHECK: .Lfunc_begin5:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT: .cfi_lsda 28, .Lexception5
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: .cfi_def_cfa w29, 32
+; CHECK-NEXT: .cfi_offset w19, -16
+; CHECK-NEXT: .cfi_offset w30, -24
+; CHECK-NEXT: .cfi_offset w29, -32
+; CHECK-NEXT: bl __arm_sme_state_size
+; CHECK-NEXT: sub sp, sp, x0
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: .Ltmp15: // EH_LABEL
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_save
+; CHECK-NEXT: bl agnostic_za_call
+; CHECK-NEXT: .Ltmp16: // EH_LABEL
+; CHECK-NEXT: .LBB5_1: // %exit
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: bl __arm_sme_restore
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB5_2: // %catch
+; CHECK-NEXT: .Ltmp17: // EH_LABEL
+; CHECK-NEXT: bl __cxa_begin_catch
+; CHECK-NEXT: bl __cxa_end_catch
+; CHECK-NEXT: b .LBB5_1
+;
+; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke:
+; CHECK-SDAG: .Lfunc_begin5:
+; CHECK-SDAG-NEXT: .cfi_startproc
+; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception5
+; CHECK-SDAG-NEXT: // %bb.0: // %entry
+; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-SDAG-NEXT: mov x29, sp
+; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32
+; CHECK-SDAG-NEXT: .cfi_offset w19, -16
+; CHECK-SDAG-NEXT: .cfi_offset w30, -24
+; CHECK-SDAG-NEXT: .cfi_offset w29, -32
+; CHECK-SDAG-NEXT: bl __arm_sme_state_size
+; CHECK-SDAG-NEXT: sub sp, sp, x0
+; CHECK-SDAG-NEXT: mov x19, sp
+; CHECK-SDAG-NEXT: .Ltmp15: // EH_LABEL
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_save
+; CHECK-SDAG-NEXT: bl agnostic_za_call
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_restore
+; CHECK-SDAG-NEXT: .Ltmp16: // EH_LABEL
+; CHECK-SDAG-NEXT: .LBB5_1: // %exit
+; CHECK-SDAG-NEXT: mov sp, x29
+; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-SDAG-NEXT: ret
+; CHECK-SDAG-NEXT: .LBB5_2: // %catch
+; CHECK-SDAG-NEXT: .Ltmp17: // EH_LABEL
+; CHECK-SDAG-NEXT: mov x1, x0
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_restore
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_save
+; CHECK-SDAG-NEXT: mov x0, x1
+; CHECK-SDAG-NEXT: bl __cxa_begin_catch
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_restore
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_save
+; CHECK-SDAG-NEXT: bl __cxa_end_catch
+; CHECK-SDAG-NEXT: mov x0, x19
+; CHECK-SDAG-NEXT: bl __arm_sme_restore
+; CHECK-SDAG-NEXT: b .LBB5_1
+entry:
+ invoke void @agnostic_za_call()
+ to label %exit unwind label %catch
+
+catch:
+ %eh_info = landingpad { ptr, i32 }
+ catch ptr null
+ %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
+ tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
+ tail call void @__cxa_end_catch()
+ br label %exit
+
+exit:
+ ret void
+}
+
+; This is the same `try_catch_agnostic_za_invoke`, but shows a lazy save would
+; also need to be committed in a shared-ZA function calling an agnostic-ZA function.
+define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: try_catch_inout_za_agnostic_za_callee:
+; CHECK: .Lfunc_begin6:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT: .cfi_lsda 28, .Lexception6
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: rdsvl x8, #1
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: msub x9, x8, x8, x9
+; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-NEXT: .Ltmp18: // EH_LABEL
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: msr TPIDR2_EL0, x8
+; CHECK-NEXT: bl agnostic_za_call
+; CHECK-NEXT: .Ltmp19: // EH_LABEL
+; CHECK-NEXT: .LBB6_1: // %exit
+; CHECK-NEXT: smstart za
+; CHECK-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-NEXT: sub x0, x29, #16
+; CHECK-NEXT: cbnz x8, .LBB6_3
+; CHECK-NEXT: // %bb.2: // %exit
+; CHECK-NEXT: bl __arm_tpidr2_restore
+; CHECK-NEXT: .LBB6_3: // %exit
+; CHECK-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-NEXT: mov sp, x29
+; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB6_4: // %catch
+; CHECK-NEXT: .Ltmp20: // EH_LABEL
+; CHECK-NEXT: bl __cxa_begin_catch
+; CHECK-NEXT: bl __cxa_end_catch
+; CHECK-NEXT: b .LBB6_1
+;
+; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee:
+; CHECK-SDAG: .Lfunc_begin6:
+; CHECK-SDAG-NEXT: .cfi_startproc
+; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception6
+; CHECK-SDAG-NEXT: // %bb.0: // %entry
+; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
+; CHECK-SDAG-NEXT: mov x29, sp
+; CHECK-SDAG-NEXT: sub sp, sp, #16
+; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32
+; CHECK-SDAG-NEXT: .cfi_offset w19, -16
+; CHECK-SDAG-NEXT: .cfi_offset w30, -24
+; CHECK-SDAG-NEXT: .cfi_offset w29, -32
+; CHECK-SDAG-NEXT: rdsvl x8, #1
+; CHECK-SDAG-NEXT: mov x9, sp
+; CHECK-SDAG-NEXT: msub x9, x8, x8, x9
+; CHECK-SDAG-NEXT: mov sp, x9
+; CHECK-SDAG-NEXT: stp x9, x8, [x29, #-16]
+; CHECK-SDAG-NEXT: .Ltmp18: // EH_LABEL
+; CHECK-SDAG-NEXT: sub x19, x29, #16
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT: bl agnostic_za_call
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT: sub x0, x29, #16
+; CHECK-SDAG-NEXT: cbnz x8, .LBB6_2
+; CHECK-SDAG-NEXT: // %bb.1: // %entry
+; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT: .LBB6_2: // %entry
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT: .Ltmp19: // EH_LABEL
+; CHECK-SDAG-NEXT: .LBB6_3: // %exit
+; CHECK-SDAG-NEXT: mov sp, x29
+; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
+; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK-SDAG-NEXT: ret
+; CHECK-SDAG-NEXT: .LBB6_4: // %catch
+; CHECK-SDAG-NEXT: .Ltmp20: // EH_LABEL
+; CHECK-SDAG-NEXT: mov x1, x0
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT: sub x0, x29, #16
+; CHECK-SDAG-NEXT: cbnz x8, .LBB6_6
+; CHECK-SDAG-NEXT: // %bb.5: // %catch
+; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT: .LBB6_6: // %catch
+; CHECK-SDAG-NEXT: mov x0, x1
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT: bl __cxa_begin_catch
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT: sub x0, x29, #16
+; CHECK-SDAG-NEXT: cbnz x8, .LBB6_8
+; CHECK-SDAG-NEXT: // %bb.7: // %catch
+; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT: .LBB6_8: // %catch
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19
+; CHECK-SDAG-NEXT: bl __cxa_end_catch
+; CHECK-SDAG-NEXT: smstart za
+; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0
+; CHECK-SDAG-NEXT: sub x0, x29, #16
+; CHECK-SDAG-NEXT: cbnz x8, .LBB6_10
+; CHECK-SDAG-NEXT: // %bb.9: // %catch
+; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore
+; CHECK-SDAG-NEXT: .LBB6_10: // %catch
+; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr
+; CHECK-SDAG-NEXT: b .LBB6_3
+entry:
+ invoke void @agnostic_za_call()
+ to label %exit unwind label %catch
+
+catch:
+ %eh_info = landingpad { ptr, i32 }
+ catch ptr null
+ %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0
+ tail call ptr @__cxa_begin_catch(ptr %exception_ptr)
+ tail call void @__cxa_end_catch()
+ br label %exit
+
+exit:
+ ret void
+}
+
declare ptr @__cxa_allocate_exception(i64)
declare void @__cxa_throw(ptr, ptr, ptr)
declare ptr @__cxa_begin_catch(ptr)
@@ -742,3 +983,4 @@ declare void @may_throw()
declare void @shared_za_call() "aarch64_inout_za"
declare void @noexcept_shared_za_call() "aarch64_inout_za"
declare void @shared_zt0_call() "aarch64_inout_zt0"
+declare void @agnostic_za_call() "aarch64_za_state_agnostic"
diff --git a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
index c6ee804..07fbed9 100644
--- a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -90,7 +90,7 @@ define i32 @test_tbegin_nofloat4(i32 %pad, ptr %ptr) {
; CHECK: tbegin 0, 65292
; CHECK: ipm %r2
; CHECK: srl %r2, 28
-; CHECK: ciblh %r2, 2, 0(%r14)
+; CHECK: bnhr %r14
; CHECK: mvhi 0(%r3), 0
; CHECK: br %r14
%res = call i32 @llvm.s390.tbegin.nofloat(ptr null, i32 65292)
@@ -219,7 +219,7 @@ define i32 @test_tend2(i32 %pad, ptr %ptr) {
; CHECK: tend
; CHECK: ipm %r2
; CHECK: srl %r2, 28
-; CHECK: ciblh %r2, 2, 0(%r14)
+; CHECK: bnhr %r14
; CHECK: mvhi 0(%r3), 0
; CHECK: br %r14
%res = call i32 @llvm.s390.tend()
diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll
new file mode 100644
index 0000000..6b8746e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll
@@ -0,0 +1,738 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s
+; Test implementation of combining br_ccmask for flag output operand, and
+; optimizing ipm sequence using conditional branches.
+
+declare void @dummy()
+
+; Check a case where the cc is used as an integer.
+; Just (srl (ipm)) sequence without optimization.
+define i32 @test(ptr %a) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: srl %r2, 28
+; CHECK-NEXT: br %r14
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ ret i32 %cc
+}
+
+; Test-1(f1_0_*). Test all 14 valid combinations, where cc is being used for
+; branching.
+
+; Check (cc == 0).
+define void @f1_0_eq_0(ptr %a) {
+; CHECK-LABEL: f1_0_eq_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jge dummy@PLT
+; CHECK-NEXT: .LBB1_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp eq i32 %cc, 0
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc != 0).
+define void @f1_0_ne_0(ptr %a) {
+; CHECK-LABEL: f1_0_ne_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgne dummy@PLT
+; CHECK-NEXT: .LBB2_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ugt i32 %cc, 0
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 1).
+define void @f1_0_eq_1(ptr %a) {
+; CHECK-LABEL: f1_0_eq_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgl dummy@PLT
+; CHECK-NEXT: .LBB3_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp eq i32 %cc, 1
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc != 1).
+define void @f1_0_ne_1(ptr %a) {
+; CHECK-LABEL: f1_0_ne_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnl dummy@PLT
+; CHECK-NEXT: .LBB4_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ne i32 %cc, 1
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 2).
+define void @f1_0_eq_2(ptr %a) {
+; CHECK-LABEL: f1_0_eq_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgh dummy@PLT
+; CHECK-NEXT: .LBB5_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp eq i32 %cc, 2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc != 2).
+define void @f1_0_ne_2(ptr %a) {
+; CHECK-LABEL: f1_0_ne_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnh dummy@PLT
+; CHECK-NEXT: .LBB6_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ne i32 %cc, 2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 3).
+define void @f1_0_eq_3(ptr %a) {
+; CHECK-LABEL: f1_0_eq_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgo dummy@PLT
+; CHECK-NEXT: .LBB7_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp eq i32 %cc, 3
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc != 3).
+define void @f1_0_ne_3(ptr %a) {
+; CHECK-LABEL: f1_0_ne_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgno dummy@PLT
+; CHECK-NEXT: .LBB8_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ult i32 %cc, 3
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 0|1).
+define void @f1_0_01(ptr %a) {
+; CHECK-LABEL: f1_0_01:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgle dummy@PLT
+; CHECK-NEXT: .LBB9_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ult i32 %cc, 2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 0|2).
+define void @f1_0_02(ptr %a) {
+; CHECK-LABEL: f1_0_02:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jghe dummy@PLT
+; CHECK-NEXT: .LBB10_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 0|3).
+define void @f1_0_03(ptr %a) {
+; CHECK-LABEL: f1_0_03:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnlh dummy@PLT
+; CHECK-NEXT: .LBB11_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp0 = icmp ne i32 %cc, 0
+ %cmp3 = icmp ne i32 %cc, 3
+ %cmp.inv = and i1 %cmp0, %cmp3
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 1|2).
+define void @f1_0_12(ptr %a) {
+; CHECK-LABEL: f1_0_12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jglh dummy@PLT
+; CHECK-NEXT: .LBB12_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq1 = icmp eq i32 %cc, 1
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %cmp = or i1 %cmpeq1, %cmpeq2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 1|3).
+define void @f1_0_13(ptr %a) {
+; CHECK-LABEL: f1_0_13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnhe dummy@PLT
+; CHECK-NEXT: .LBB13_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq1 = icmp eq i32 %cc, 1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cmp = or i1 %cmpeq1, %cmpeq3
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check (cc == 2|3).
+define void @f1_0_23(ptr %a) {
+; CHECK-LABEL: f1_0_23:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnle dummy@PLT
+; CHECK-NEXT: .LBB14_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmp = icmp ugt i32 %cc, 1
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Test-2(f1_1_*/f1_2_*/fl_3_*/f1_4_*).
+; Test Mixed patterns involving Binary Ops.
+
+; Check 'add' for (cc != 0).
+define void @f1_1_1(ptr %a) {
+; CHECK-LABEL: f1_1_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgne dummy@PLT
+; CHECK-NEXT: .LBB15_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cmp = icmp ult i32 %add, 3
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'add' for (cc == 1|2).
+define void @f1_1_2(ptr %a) {
+; CHECK-LABEL: f1_1_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jglh dummy@PLT
+; CHECK-NEXT: .LBB16_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cmp = icmp ult i32 %add, 2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'add' for (cc == 1|2).
+define void @f1_1_3(ptr %a) {
+; CHECK-LABEL: f1_1_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jglh dummy@PLT
+; CHECK-NEXT: .LBB17_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -3
+ %cmp.inv = icmp ult i32 %add, -2
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define void @f1_2_1(ptr %a) {
+; CHECK-LABEL: f1_2_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnl dummy@PLT
+; CHECK-NEXT: .LBB18_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpne0 = icmp ne i32 %andcc, 0
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cmp.inv = and i1 %cmpne3, %cmpne0
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define void @f1_2_2(ptr %a) {
+; CHECK-LABEL: f1_2_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnh dummy@PLT
+; CHECK-NEXT: .LBB19_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %ugt1 = icmp samesign ugt i32 %cc, 1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %and.cond.inv = and i1 %ugt1, %cmpne3
+ br i1 %and.cond.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define void @f1_2_3(ptr %a) {
+; CHECK-LABEL: f1_2_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jghe dummy@PLT
+; CHECK-NEXT: .LBB20_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'and/tm' for (cc == 1|3).
+define void @f1_2_4(ptr %a) {
+; CHECK-LABEL: f1_2_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnhe dummy@PLT
+; CHECK-NEXT: .LBB21_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define void @f1_2_5(ptr %a) {
+; CHECK-LABEL: f1_2_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnl dummy@PLT
+; CHECK-NEXT: .LBB22_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cmp = xor i1 %cmpne3, %trunc
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define void @f1_3_1(ptr %a) {
+; CHECK-LABEL: f1_3_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnl dummy@PLT
+; CHECK-NEXT: .LBB23_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq0 = icmp eq i32 %cc, 0
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %xor = xor i1 %cmpeq0, %cmpeq2
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cmp.inv = xor i1 %cmpne3, %xor
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define void @f1_3_2(ptr %a) {
+; CHECK-LABEL: f1_3_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnl dummy@PLT
+; CHECK-NEXT: .LBB24_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cmp.inv = xor i1 %cmpeq3, %trunc
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define void @f1_3_3(ptr %a) {
+; CHECK-LABEL: f1_3_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgnh dummy@PLT
+; CHECK-NEXT: .LBB25_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne0 = icmp ne i32 %cc, 0
+ %cmp.cond.inv = xor i1 %cmpne0, %trunc
+ br i1 %cmp.cond.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'or' with both operands are select_ccmask one with TM and other with
+; ICMP(cc == 1).
+define void @f1_4_1(ptr %a) {
+; CHECK-LABEL: f1_4_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgl dummy@PLT
+; CHECK-NEXT: .LBB26_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cmp.cond.inv = or i1 %cmpeq3, %cmpeq0
+ br i1 %cmp.cond.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'or' for (cc == 0|1).
+define void @f1_4_2(ptr %a) {
+; CHECK-LABEL: f1_4_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgle dummy@PLT
+; CHECK-NEXT: .LBB27_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cmp.inv = icmp samesign ugt i32 %or, -3
+ br i1 %cmp.inv, label %exit, label %branch
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
+; Check 'or' for (cc == 0|1).
+define void @f1_4_3(ptr %a) {
+; CHECK-LABEL: f1_4_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: jgle dummy@PLT
+; CHECK-NEXT: .LBB28_1: # %exit
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cmp = icmp samesign ult i32 %or, -2
+ br i1 %cmp, label %branch, label %exit
+branch:
+ tail call void @dummy()
+ br label %exit
+exit:
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll
new file mode 100644
index 0000000..b9b9a4b
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll
@@ -0,0 +1,1665 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s
+; Test implementation of combining select_ccmask for flag output operand and
+; optimizing ipm sequence using conditional branches.
+
+; Test-1(f2_0_*): Both TrueVal and FalseVal non-const(14-valid CCMask).
+
+; Check (cc == 0).
+define i64 @f2_0_eq_0(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: ber %r14
+; CHECK-NEXT: .LBB0_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp eq i32 %cc, 0
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc != 0).
+define i64 @f2_0_ne_0(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bner %r14
+; CHECK-NEXT: .LBB1_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ugt i32 %cc, 0
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 1).
+define i64 @f2_0_eq_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB2_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp eq i32 %cc, 1
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc != 1).
+define i64 @f2_0_ne_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB3_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ne i32 %cc, 1
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 2).
+define i64 @f2_0_eq_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB4_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp eq i32 %cc, 2
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc != 2).
+define i64 @f2_0_ne_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnhr %r14
+; CHECK-NEXT: .LBB5_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ne i32 %cc, 2
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 3).
+define i64 @f2_0_eq_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_eq_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bor %r14
+; CHECK-NEXT: .LBB6_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp eq i32 %cc, 3
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc != 3).
+define i64 @f2_0_ne_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_ne_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnor %r14
+; CHECK-NEXT: .LBB7_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ult i32 %cc, 3
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 0|1).
+define i64 @f2_0_01(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_01:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bler %r14
+; CHECK-NEXT: .LBB8_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ult i32 %cc, 2
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 0|2).
+define i64 @f2_0_02(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_02:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB9_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %and = and i32 %cc, 1
+ %cond = icmp eq i32 %and, 0
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check (cc == 0|3).
+define i64 @f2_0_03(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_03:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blhr %r14
+; CHECK-NEXT: .LBB10_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cmp0 = icmp ne i32 %cc, 0
+ %cmp3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %cmp0, %cmp3
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check (cc == 1|2).
+define i64 @f2_0_12(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlhr %r14
+; CHECK-NEXT: .LBB11_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %add = add nsw i32 %cc, -3
+ %cond.inv = icmp ult i32 %add, -2
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check (cc == 1|3).
+define i64 @f2_0_13(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_0_13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB12_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %and = and i32 %cc, 1
+ %cond.inv = icmp eq i32 %and, 0
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check (cc == 2|3).
+define i64 @f2_0_23(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_0_23:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB13_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %cond = icmp ugt i32 %cc, 1
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Test-2(f2_1_*/f2_2_*/f2_3_*/f2_4_*).
+; Both TrueVal and FalseVal are non-const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f2_1_1(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_1_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bner %r14
+; CHECK-NEXT: .LBB14_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 3
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f2_1_2(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_1_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blhr %r14
+; CHECK-NEXT: .LBB15_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 2
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f2_1_3(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_1_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlhr %r14
+; CHECK-NEXT: .LBB16_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -3
+ %cond.inv = icmp ult i32 %add, -2
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f2_2_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB17_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpne0 = icmp ne i32 %andcc, 0
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %cmpne3, %cmpne0
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f2_2_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB18_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %ugt1 = icmp samesign ugt i32 %cc, 1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %ugt1, %cmpne3
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f2_2_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_2_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB19_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond = icmp eq i32 %and, 0
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 1|3).
+define i64 @f2_2_4(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_2_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB20_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond.inv = icmp eq i32 %and, 0
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f2_2_5(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_2_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB21_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond = xor i1 %cmpne3, %trunc
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f2_3_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB22_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq0 = icmp eq i32 %cc, 0
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %xor = xor i1 %cmpeq0, %cmpeq2
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = xor i1 %cmpne3, %xor
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f2_3_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB23_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = xor i1 %cmpeq3, %trunc
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f2_3_3(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_3_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB24_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne0 = icmp ne i32 %cc, 0
+ %cond.inv = xor i1 %cmpne0, %trunc
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f2_4_1(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_4_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB25_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = or i1 %cmpeq3, %cmpeq0
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f2_4_2(i64 %y, i64 %x, ptr %a) {
+; CHECK-LABEL: f2_4_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB26_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond.inv = icmp samesign ugt i32 %or, -3
+ %res = select i1 %cond.inv, i64 %y, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f2_4_3(i64 %x, i64 %y, ptr %a) {
+; CHECK-LABEL: f2_4_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r4), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bler %r14
+; CHECK-NEXT: .LBB27_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond = icmp samesign ult i32 %or, -2
+ %res = select i1 %cond, i64 %x, i64 %y
+ ret i64 %res
+}
+
+; Test-3(f3_1_*/f3_2_*/f3_3_*/f3_4_*).
+; TrueVal is non-const and FalseVal is const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f3_1_1(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_1_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bner %r14
+; CHECK-NEXT: .LBB28_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 3
+ %res = select i1 %cond, i64 %x, i64 5
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f3_1_2(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_1_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blhr %r14
+; CHECK-NEXT: .LBB29_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 2
+ %res = select i1 %cond, i64 %x, i64 5
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f3_1_3(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_1_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnlhr %r14
+; CHECK-NEXT: .LBB30_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -3
+ %cond.inv = icmp ult i32 %add, -2
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f3_2_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB31_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpne0 = icmp ne i32 %andcc, 0
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %cmpne3, %cmpne0
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f3_2_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB32_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %ugt1 = icmp samesign ugt i32 %cc, 1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %ugt1, %cmpne3
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f3_2_3(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_2_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB33_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond = icmp eq i32 %and, 0
+ %res = select i1 %cond, i64 %x, i64 5
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 1|3).
+define i64 @f3_2_4(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_2_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB34_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond.inv = icmp eq i32 %and, 0
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f3_2_5(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_2_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB35_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond = xor i1 %cmpne3, %trunc
+ %res = select i1 %cond, i64 %x, i64 5
+ ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f3_3_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB36_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq0 = icmp eq i32 %cc, 0
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %xor = xor i1 %cmpeq0, %cmpeq2
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = xor i1 %cmpne3, %xor
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f3_3_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB37_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = xor i1 %cmpeq3, %trunc
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f3_3_3(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_3_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB38_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne0 = icmp ne i32 %cc, 0
+ %cond.inv = xor i1 %cmpne0, %trunc
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f3_4_1(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_4_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB39_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = or i1 %cmpeq3, %cmpeq0
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f3_4_2(ptr %a, i64 %x) {
+; CHECK-LABEL: f3_4_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB40_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond.inv = icmp samesign ugt i32 %or, -3
+ %res = select i1 %cond.inv, i64 5, i64 %x
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f3_4_3(i64 %x, ptr %a) {
+; CHECK-LABEL: f3_4_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bler %r14
+; CHECK-NEXT: .LBB41_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond = icmp samesign ult i32 %or, -2
+ %res = select i1 %cond, i64 %x, i64 5
+ ret i64 %res
+}
+
+
+; Test-4(f4_1_*/f4_2_*/f4_3_*/f4_4_*).
+; TrueVal is const and FalseVal is non-const with mixed patterns involving
+; Binary Ops.
+
+; Check 'add' for (cc != 0).
+define i64 @f4_1_1(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_1_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bner %r14
+; CHECK-NEXT: .LBB42_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 3
+ %res = select i1 %cond, i64 15, i64 %y
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f4_1_2(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_1_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: blhr %r14
+; CHECK-NEXT: .LBB43_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 2
+ %res = select i1 %cond, i64 15, i64 %y
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f4_1_3(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_1_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlhr %r14
+; CHECK-NEXT: .LBB44_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -3
+ %cond.inv = icmp ult i32 %add, -2
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f4_2_1(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB45_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpne0 = icmp ne i32 %andcc, 0
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %cmpne3, %cmpne0
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f4_2_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB46_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %ugt1 = icmp samesign ugt i32 %cc, 1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %ugt1, %cmpne3
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f4_2_3(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_2_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB47_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond = icmp eq i32 %and, 0
+ %res = select i1 %cond, i64 15, i64 %y
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 1|3).
+define i64 @f4_2_4(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_2_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB48_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond.inv = icmp eq i32 %and, 0
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f4_2_5(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_2_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB49_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond = xor i1 %cmpne3, %trunc
+ %res = select i1 %cond, i64 15, i64 %y
+ ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f4_3_1(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB50_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq0 = icmp eq i32 %cc, 0
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %xor = xor i1 %cmpeq0, %cmpeq2
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = xor i1 %cmpne3, %xor
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f4_3_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB51_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = xor i1 %cmpeq3, %trunc
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f4_3_3(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_3_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB52_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne0 = icmp ne i32 %cc, 0
+ %cond.inv = xor i1 %cmpne0, %trunc
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f4_4_1(i64 %y,ptr %a) {
+; CHECK-LABEL: f4_4_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB53_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = or i1 %cmpeq3, %cmpeq0
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f4_4_2(i64 %y, ptr %a) {
+; CHECK-LABEL: f4_4_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r3), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB54_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond.inv = icmp samesign ugt i32 %or, -3
+ %res = select i1 %cond.inv, i64 %y, i64 15
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f4_4_3(ptr %a, i64 %y) {
+; CHECK-LABEL: f4_4_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bler %r14
+; CHECK-NEXT: .LBB55_1: # %entry
+; CHECK-NEXT: lgr %r2, %r3
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond = icmp samesign ult i32 %or, -2
+ %res = select i1 %cond, i64 15, i64 %y
+ ret i64 %res
+}
+
+; Test-5(f5_1_*/f5_2_*/f5_3_*/f5_4_*).
+; Both TrueVal and FalseVal are const with mixed patterns involving
+; Binary Ops.
+
+
+; Check 'add' for (cc != 0).
+define i64 @f5_1_1(ptr %a) {
+; CHECK-LABEL: f5_1_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bner %r14
+; CHECK-NEXT: .LBB56_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 3
+ %res = select i1 %cond, i64 15, i64 5
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f5_1_2(ptr %a) {
+; CHECK-LABEL: f5_1_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: blhr %r14
+; CHECK-NEXT: .LBB57_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -1
+ %cond = icmp ult i32 %add, 2
+ %res = select i1 %cond, i64 15, i64 5
+ ret i64 %res
+}
+
+; Check 'add' for (cc == 1|2).
+define i64 @f5_1_3(ptr %a) {
+; CHECK-LABEL: f5_1_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnlhr %r14
+; CHECK-NEXT: .LBB58_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %add = add nsw i32 %cc, -3
+ %cond.inv = icmp ult i32 %add, -2
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'and' with one operand cc and other select_ccmask(cc !=1).
+define i64 @f5_2_1(ptr %a) {
+; CHECK-LABEL: f5_2_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB59_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpne0 = icmp ne i32 %andcc, 0
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %cmpne3, %cmpne0
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'and' with both operands select_ccmask(cc != 2).
+define i64 @f5_2_2(ptr %a) {
+; CHECK-LABEL: f5_2_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB60_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %ugt1 = icmp samesign ugt i32 %cc, 1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = and i1 %ugt1, %cmpne3
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 0|2).
+define i64 @f5_2_3(ptr %a) {
+; CHECK-LABEL: f5_2_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB61_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond = icmp eq i32 %and, 0
+ %res = select i1 %cond, i64 15, i64 5
+ ret i64 %res
+}
+
+; Check 'and/tm' for (cc == 1|3).
+define i64 @f5_2_4(ptr %a) {
+; CHECK-LABEL: f5_2_4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB62_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %and = and i32 %cc, 1
+ %cond.inv = icmp eq i32 %and, 0
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1).
+define i64 @f5_2_5(ptr %a) {
+; CHECK-LABEL: f5_2_5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB63_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond = xor i1 %cmpne3, %trunc
+ %res = select i1 %cond, i64 15, i64 5
+ ret i64 %res
+}
+
+
+; Check nested 'xor' cc with select_ccmask(cc != 1).
+define i64 @f5_3_1(ptr %a) {
+; CHECK-LABEL: f5_3_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB64_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %cmpeq0 = icmp eq i32 %cc, 0
+ %cmpeq2 = icmp eq i32 %cc, 2
+ %xor = xor i1 %cmpeq0, %cmpeq2
+ %cmpne3 = icmp ne i32 %cc, 3
+ %cond.inv = xor i1 %cmpne3, %xor
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=1).
+define i64 @f5_3_2(ptr %a) {
+; CHECK-LABEL: f5_3_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: blr %r14
+; CHECK-NEXT: .LBB65_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = xor i1 %cmpeq3, %trunc
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check branching on 'tm' and 'xor' with one operand cc and the other
+; select_ccmask(cc !=2).
+define i64 @f5_3_3(ptr %a) {
+; CHECK-LABEL: f5_3_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bhr %r14
+; CHECK-NEXT: .LBB66_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %trunc = trunc i32 %cc to i1
+ %cmpne0 = icmp ne i32 %cc, 0
+ %cond.inv = xor i1 %cmpne0, %trunc
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1).
+define i64 @f5_4_1(ptr %a) {
+; CHECK-LABEL: f5_4_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnlr %r14
+; CHECK-NEXT: .LBB67_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %cond.inv = or i1 %cmpeq3, %cmpeq0
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f5_4_2(ptr %a) {
+; CHECK-LABEL: f5_4_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: bnler %r14
+; CHECK-NEXT: .LBB68_1: # %entry
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond.inv = icmp samesign ugt i32 %or, -3
+ %res = select i1 %cond.inv, i64 5, i64 15
+ ret i64 %res
+}
+
+; Check 'or' for (cc == 0|1).
+define i64 @f5_4_3(ptr %a) {
+; CHECK-LABEL: f5_4_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bler %r14
+; CHECK-NEXT: .LBB69_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %tmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %tmp)
+ %or = or disjoint i32 %cc, -4
+ %cond = icmp samesign ult i32 %or, -2
+ %res = select i1 %cond, i64 15, i64 5
+ ret i64 %res
+}
+
+; Nested select_ccmask with TrueVal and FalseVal swapped with each other.
+define i64 @f6_1(ptr %a) {
+; CHECK-LABEL: f6_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: alsi 0(%r2), -1
+; CHECK-EMPTY:
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: lghi %r2, 15
+; CHECK-NEXT: bher %r14
+; CHECK-NEXT: .LBB70_1: # %entry
+; CHECK-NEXT: lghi %r2, 5
+; CHECK-NEXT: br %r14
+entry:
+ %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a)
+ %cmp = icmp ult i32 %cc, 4
+ tail call void @llvm.assume(i1 %cmp)
+ %andcc = and i32 %cc, 1
+ %cmpeq0 = icmp eq i32 %andcc, 0
+ %cmpeq3 = icmp eq i32 %cc, 3
+ %select = select i1 %cmpeq3, i64 5, i64 15
+ %res = select i1 %cmpeq0, i64 %select, i64 5
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 28b4541..7bdc4e1 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: callq __ubyte_convert_to_ctype
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_6
+; CHECK-NEXT: js LBB0_4
; CHECK-NEXT: ## %bb.1: ## %cond_next.i
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: movq %rbx, %rdi
@@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: sarl $31, %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: cmpl $-2, %ecx
-; CHECK-NEXT: je LBB0_10
+; CHECK-NEXT: je LBB0_8
; CHECK-NEXT: ## %bb.2: ## %cond_next.i
; CHECK-NEXT: cmpl $-1, %ecx
-; CHECK-NEXT: jne LBB0_3
-; CHECK-NEXT: LBB0_8: ## %bb4
+; CHECK-NEXT: jne LBB0_6
+; CHECK-NEXT: LBB0_3: ## %bb4
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 16(%rax), %rax
-; CHECK-NEXT: jmp LBB0_9
-; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: jmp LBB0_10
+; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-2, %eax
-; CHECK-NEXT: je LBB0_10
-; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
-; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: je LBB0_8
-; CHECK-NEXT: LBB0_3: ## %bb35
+; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
+; CHECK-NEXT: cmpl $-1, %eax
+; CHECK-NEXT: je LBB0_3
+; CHECK-NEXT: LBB0_6: ## %bb35
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *216(%rax)
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: je LBB0_4
-; CHECK-NEXT: ## %bb.12: ## %cond_false.i
-; CHECK-NEXT: setne %dil
+; CHECK-NEXT: je LBB0_11
+; CHECK-NEXT: ## %bb.7: ## %cond_false.i
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movzbl %sil, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movl %eax, %r15d
; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: setne %al
-; CHECK-NEXT: testb %dil, %al
-; CHECK-NEXT: jne LBB0_5
-; CHECK-NEXT: LBB0_13: ## %cond_true.i200
-; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: jne LBB0_15
-; CHECK-NEXT: ## %bb.14: ## %cond_true14.i
-; CHECK-NEXT: movl $4, %edi
-; CHECK-NEXT: callq _feraiseexcept
-; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit
-; CHECK-NEXT: xorl %ebx, %ebx
-; CHECK-NEXT: jmp LBB0_16
-; CHECK-NEXT: LBB0_10: ## %bb17
+; CHECK-NEXT: jne LBB0_12
+; CHECK-NEXT: jmp LBB0_14
+; CHECK-NEXT: LBB0_8: ## %bb17
; CHECK-NEXT: callq _PyErr_Occurred
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: ## %bb.11: ## %cond_next
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: ## %bb.9: ## %cond_next
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 80(%rax), %rax
-; CHECK-NEXT: LBB0_9: ## %bb4
+; CHECK-NEXT: LBB0_10: ## %bb4
; CHECK-NEXT: movq 96(%rax), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq *40(%rax)
-; CHECK-NEXT: jmp LBB0_24
-; CHECK-NEXT: LBB0_4: ## %cond_true.i
+; CHECK-NEXT: jmp LBB0_28
+; CHECK-NEXT: LBB0_11: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: testb %sil, %sil
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: je LBB0_14
+; CHECK-NEXT: LBB0_12: ## %cond_false.i
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: xorl %r15d, %r15d
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: jne LBB0_13
-; CHECK-NEXT: LBB0_5: ## %cond_next17.i
+; CHECK-NEXT: je LBB0_14
+; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movzbl %ah, %ebx
-; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: jmp LBB0_18
+; CHECK-NEXT: LBB0_14: ## %cond_true.i200
+; CHECK-NEXT: testb %dl, %dl
+; CHECK-NEXT: jne LBB0_17
+; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
+; CHECK-NEXT: movl $4, %edi
+; CHECK-NEXT: callq _feraiseexcept
+; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
+; CHECK-NEXT: xorl %ebx, %ebx
+; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *224(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je LBB0_19
-; CHECK-NEXT: ## %bb.17: ## %cond_true61
+; CHECK-NEXT: je LBB0_21
+; CHECK-NEXT: ## %bb.19: ## %cond_true61
; CHECK-NEXT: movl %eax, %ebp
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi
@@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: callq *200(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: js LBB0_23
-; CHECK-NEXT: ## %bb.18: ## %cond_next73
+; CHECK-NEXT: js LBB0_27
+; CHECK-NEXT: ## %bb.20: ## %cond_next73
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
@@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: callq *232(%rax)
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: LBB0_19: ## %cond_next89
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: LBB0_21: ## %cond_next89
; CHECK-NEXT: movl $2, %edi
; CHECK-NEXT: callq _PyTuple_New
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_23
-; CHECK-NEXT: ## %bb.20: ## %cond_next97
+; CHECK-NEXT: je LBB0_27
+; CHECK-NEXT: ## %bb.22: ## %cond_next97
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12
; CHECK-NEXT: movq (%r12), %rax
@@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_21
-; CHECK-NEXT: ## %bb.25: ## %cond_next135
+; CHECK-NEXT: je LBB0_25
+; CHECK-NEXT: ## %bb.23: ## %cond_next135
; CHECK-NEXT: movb %r15b, 16(%rax)
; CHECK-NEXT: movq %rax, 24(%r14)
; CHECK-NEXT: movq (%r12), %rax
@@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: je LBB0_21
-; CHECK-NEXT: ## %bb.26: ## %cond_next182
+; CHECK-NEXT: je LBB0_25
+; CHECK-NEXT: ## %bb.24: ## %cond_next182
; CHECK-NEXT: movb %bl, 16(%rax)
; CHECK-NEXT: movq %rax, 32(%r14)
; CHECK-NEXT: movq %r14, %rax
-; CHECK-NEXT: jmp LBB0_24
-; CHECK-NEXT: LBB0_21: ## %cond_true113
+; CHECK-NEXT: jmp LBB0_28
+; CHECK-NEXT: LBB0_25: ## %cond_true113
; CHECK-NEXT: decq (%r14)
-; CHECK-NEXT: jne LBB0_23
-; CHECK-NEXT: ## %bb.22: ## %cond_true126
+; CHECK-NEXT: jne LBB0_27
+; CHECK-NEXT: ## %bb.26: ## %cond_true126
; CHECK-NEXT: movq 8(%r14), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: callq *48(%rax)
-; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock
+; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
index df04b67..c2b7068 100644
--- a/llvm/test/CodeGen/X86/isel-fpclass.ll
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86,X86-SDAGISEL
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
-; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X86
-; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X64,X64-GISEL
+; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X86,X86-GISEL
+; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X64-GISEL
define i1 @isnone_f(float %x) nounwind {
; X86-LABEL: isnone_f:
@@ -23,6 +23,11 @@ define i1 @isnone_f(float %x) nounwind {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: xorl %eax, %eax
; X86-FASTISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isnone_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %eax, %eax
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
ret i1 %0
@@ -45,22 +50,27 @@ define i1 @isany_f(float %x) nounwind {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: movb $1, %al
; X86-FASTISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isany_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movb $1, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
ret i1 %0
}
define i1 @issignaling_f(float %x) nounwind {
-; X86-LABEL: issignaling_f:
-; X86: # %bb.0:
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT: setl %cl
-; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
-; X86-NEXT: setge %al
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: issignaling_f:
+; X86-SDAGISEL: # %bb.0:
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %cl
+; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: andb %cl, %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: issignaling_f:
; X64: # %bb.0:
@@ -87,18 +97,44 @@ define i1 @issignaling_f(float %x) nounwind {
; X86-FASTISEL-NEXT: andb %cl, %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: issignaling_f:
+; X86-GISEL: # %bb.0:
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %dl
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: andb %dl, %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: issignaling_f:
+; X64-GISEL: # %bb.0:
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %dl
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: andb %dl, %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
%a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
ret i1 %a0
}
define i1 @isquiet_f(float %x) nounwind {
-; X86-LABEL: isquiet_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT: setge %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: isquiet_f:
; X64: # %bb.0: # %entry
@@ -119,19 +155,39 @@ define i1 @issignaling_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setge %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setae %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isquiet_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setae %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
ret i1 %0
}
define i1 @not_isquiet_f(float %x) nounwind {
-; X86-LABEL: not_isquiet_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
-; X86-NEXT: setl %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: not_isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: not_isquiet_f:
; X64: # %bb.0: # %entry
@@ -152,19 +208,57 @@ define i1 @not_isquiet_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setl %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: not_isquiet_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %dl
+; X86-GISEL-NEXT: orb %cl, %dl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %cl
+; X86-GISEL-NEXT: orb %dl, %cl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %dl
+; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: andb %dl, %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_isquiet_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %dl
+; X64-GISEL-NEXT: orb %cl, %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %cl
+; X64-GISEL-NEXT: orb %dl, %cl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %dl
+; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: andb %dl, %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
ret i1 %0
}
define i1 @isinf_f(float %x) nounwind {
-; X86-LABEL: isinf_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: isinf_f:
; X64: # %bb.0: # %entry
@@ -185,19 +279,39 @@ define i1 @isinf_f(float %x) nounwind {
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isinf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
ret i1 %0
}
define i1 @not_isinf_f(float %x) nounwind {
-; X86-LABEL: not_isinf_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setne %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: not_isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: not_isinf_f:
; X64: # %bb.0: # %entry
@@ -218,17 +332,43 @@ define i1 @not_isinf_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setne %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: not_isinf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %dl
+; X86-GISEL-NEXT: orb %cl, %dl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %al
+; X86-GISEL-NEXT: orb %dl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_isinf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %dl
+; X64-GISEL-NEXT: orb %cl, %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %al
+; X64-GISEL-NEXT: orb %dl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
ret i1 %0
}
define i1 @is_plus_inf_f(float %x) nounwind {
-; X86-LABEL: is_plus_inf_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-NEXT: sete %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: is_plus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: is_plus_inf_f:
; X64: # %bb.0: # %entry
@@ -246,17 +386,34 @@ define i1 @is_plus_inf_f(float %x) nounwind {
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: is_plus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: is_plus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
ret i1 %0
}
define i1 @is_minus_inf_f(float %x) nounwind {
-; X86-LABEL: is_minus_inf_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT: sete %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: is_minus_inf_f:
; X64: # %bb.0: # %entry
@@ -274,17 +431,34 @@ define i1 @is_minus_inf_f(float %x) nounwind {
; X86-FASTISEL-NEXT: sete %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: is_minus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-GISEL-NEXT: sete %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: is_minus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-GISEL-NEXT: sete %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
ret i1 %0
}
define i1 @not_is_minus_inf_f(float %x) nounwind {
-; X86-LABEL: not_is_minus_inf_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
-; X86-NEXT: setne %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: not_is_minus_inf_f:
; X64: # %bb.0: # %entry
@@ -302,19 +476,55 @@ define i1 @not_is_minus_inf_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setne %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: not_is_minus_inf_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: pushl %ebx
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: movl %eax, %ecx
+; X86-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %edx, %edx
+; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %bl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %ah
+; X86-GISEL-NEXT: orb %dl, %ah
+; X86-GISEL-NEXT: orb %bl, %ah
+; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %al
+; X86-GISEL-NEXT: orb %ah, %al
+; X86-GISEL-NEXT: popl %ebx
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_is_minus_inf_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: movl %eax, %ecx
+; X64-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %edx, %edx
+; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %sil
+; X64-GISEL-NEXT: orb %dl, %sil
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %al
+; X64-GISEL-NEXT: orb %dl, %al
+; X64-GISEL-NEXT: orb %sil, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
ret i1 %0
}
define i1 @isfinite_f(float %x) nounwind {
-; X86-LABEL: isfinite_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: isfinite_f:
; X64: # %bb.0: # %entry
@@ -335,19 +545,39 @@ define i1 @isfinite_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setl %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: isfinite_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isfinite_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
ret i1 %0
}
define i1 @not_isfinite_f(float %x) nounwind {
-; X86-LABEL: not_isfinite_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setge %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: not_isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: not_isfinite_f:
; X64: # %bb.0: # %entry
@@ -368,17 +598,43 @@ define i1 @not_isfinite_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setge %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: not_isfinite_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: sete %dl
+; X86-GISEL-NEXT: orb %cl, %dl
+; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-GISEL-NEXT: seta %al
+; X86-GISEL-NEXT: orb %dl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: not_isfinite_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: sete %dl
+; X64-GISEL-NEXT: orb %cl, %dl
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: seta %al
+; X64-GISEL-NEXT: orb %dl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
ret i1 %0
}
define i1 @is_plus_finite_f(float %x) nounwind {
-; X86-LABEL: is_plus_finite_f:
-; X86: # %bb.0: # %entry
-; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
-; X86-NEXT: setb %al
-; X86-NEXT: retl
+; X86-SDAGISEL-LABEL: is_plus_finite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setb %al
+; X86-SDAGISEL-NEXT: retl
;
; X64-LABEL: is_plus_finite_f:
; X64: # %bb.0: # %entry
@@ -396,6 +652,23 @@ define i1 @is_plus_finite_f(float %x) nounwind {
; X86-FASTISEL-NEXT: setb %al
; X86-FASTISEL-NEXT: popl %ecx
; X86-FASTISEL-NEXT: retl
+;
+; X86-GISEL-LABEL: is_plus_finite_f:
+; X86-GISEL: # %bb.0: # %entry
+; X86-GISEL-NEXT: xorl %ecx, %ecx
+; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-GISEL-NEXT: setb %al
+; X86-GISEL-NEXT: orb %cl, %al
+; X86-GISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: is_plus_finite_f:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %ecx, %ecx
+; X64-GISEL-NEXT: movd %xmm0, %eax
+; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-GISEL-NEXT: setb %al
+; X64-GISEL-NEXT: orb %cl, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
ret i1 %0
@@ -418,6 +691,11 @@ define i1 @isnone_d(double %x) nounwind {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: xorl %eax, %eax
; X86-FASTISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isnone_d:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: xorl %eax, %eax
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0)
ret i1 %0
@@ -440,6 +718,11 @@ define i1 @isany_d(double %x) nounwind {
; X86-FASTISEL-NEXT: fstp %st(0)
; X86-FASTISEL-NEXT: movb $1, %al
; X86-FASTISEL-NEXT: retl
+;
+; X64-GISEL-LABEL: isany_d:
+; X64-GISEL: # %bb.0: # %entry
+; X64-GISEL-NEXT: movb $1, %al
+; X64-GISEL-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023)
ret i1 %0
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 4cde581..cdf6bdd 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4765,6 +4765,33 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32
}
declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
+define <16 x i32> @pr163023(ptr %a0, <16 x i32> %a1) {
+; X64-LABEL: pr163023:
+; X64: # %bb.0:
+; X64-NEXT: kxnorw %k0, %k0, %k1
+; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1}
+; X64-NEXT: vmovdqa64 %zmm1, %zmm0
+; X64-NEXT: retq
+;
+; X86-LABEL: pr163023:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kxnorw %k0, %k0, %k1
+; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1}
+; X86-NEXT: vmovdqa64 %zmm1, %zmm0
+; X86-NEXT: retl
+ %addr.p = ptrtoint ptr %a0 to i64
+ %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0
+ %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer
+ %ofs = sext <16 x i32> %a1 to <16 x i64>
+ %addr = add nuw <16 x i64> %addr.splat, %ofs
+ %ptr = inttoptr <16 x i64> %addr to <16 x ptr>
+ %gather = tail call fastcc <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison)
+ ret <16 x i32> %gather
+}
+
;
; PR45906
; This used to cause fast-isel to generate bad copy instructions that would
diff --git a/llvm/test/CodeGen/X86/pr160612.ll b/llvm/test/CodeGen/X86/pr160612.ll
new file mode 100644
index 0000000..6572c42
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr160612.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s
+
+; Test for issue #160612: OR conditions in branches should use multiple branches
+; instead of materializing booleans with SETCC when no special optimizations apply.
+
+declare void @subroutine_foo()
+declare void @subroutine_bar()
+
+; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH
+; instead of using two conditional branches directly.
+define void @func_a(i32 noundef %x, i32 noundef %y) {
+; CHECK-LABEL: func_a:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL
+entry:
+ %cmp = icmp eq i32 %x, 0
+ %cmp1 = icmp eq i32 %y, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.else
+
+if.then:
+ tail call void @subroutine_foo()
+ br label %if.end
+
+if.else:
+ tail call void @subroutine_bar()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+; Reference implementation that already generated optimal code.
+; This should continue to generate the same optimal code.
+define void @func_b(i32 noundef %x, i32 noundef %y) {
+; CHECK-LABEL: func_b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT: # %bb.1: # %if.else
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
+; CHECK-NEXT: # %bb.2: # %if.else3
+; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL
+entry:
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ tail call void @subroutine_foo()
+ br label %if.end4
+
+if.else:
+ %cmp1 = icmp eq i32 %y, 0
+ br i1 %cmp1, label %if.then2, label %if.else3
+
+if.then2:
+ tail call void @subroutine_foo()
+ br label %if.end4
+
+if.else3:
+ tail call void @subroutine_bar()
+ br label %if.end4
+
+if.end4:
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll
index 69abf6e..d018c53 100644
--- a/llvm/test/CodeGen/X86/setcc-wide-types.ll
+++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll
@@ -1493,15 +1493,23 @@ define i1 @allbits_i128_load_arg(ptr %w) {
}
define i1 @anybits_i256_load_arg(ptr %w) {
-; ANY-LABEL: anybits_i256_load_arg:
-; ANY: # %bb.0:
-; ANY-NEXT: movq (%rdi), %rax
-; ANY-NEXT: movq 8(%rdi), %rcx
-; ANY-NEXT: orq 24(%rdi), %rcx
-; ANY-NEXT: orq 16(%rdi), %rax
-; ANY-NEXT: orq %rcx, %rax
-; ANY-NEXT: setne %al
-; ANY-NEXT: retq
+; SSE-LABEL: anybits_i256_load_arg:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: orq 24(%rdi), %rcx
+; SSE-NEXT: orq 16(%rdi), %rax
+; SSE-NEXT: orq %rcx, %rax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
+;
+; AVXANY-LABEL: anybits_i256_load_arg:
+; AVXANY: # %bb.0:
+; AVXANY-NEXT: vmovdqu (%rdi), %ymm0
+; AVXANY-NEXT: vptest %ymm0, %ymm0
+; AVXANY-NEXT: setne %al
+; AVXANY-NEXT: vzeroupper
+; AVXANY-NEXT: retq
%ld = load i256, ptr %w
%cmp = icmp ne i256 %ld, 0
ret i1 %cmp
@@ -1552,21 +1560,30 @@ define i1 @allbits_i256_load_arg(ptr %w) {
}
define i1 @anybits_i512_load_arg(ptr %w) {
-; ANY-LABEL: anybits_i512_load_arg:
-; ANY: # %bb.0:
-; ANY-NEXT: movq 16(%rdi), %rax
-; ANY-NEXT: movq (%rdi), %rcx
-; ANY-NEXT: movq 8(%rdi), %rdx
-; ANY-NEXT: movq 24(%rdi), %rsi
-; ANY-NEXT: orq 56(%rdi), %rsi
-; ANY-NEXT: orq 40(%rdi), %rdx
-; ANY-NEXT: orq %rsi, %rdx
-; ANY-NEXT: orq 48(%rdi), %rax
-; ANY-NEXT: orq 32(%rdi), %rcx
-; ANY-NEXT: orq %rax, %rcx
-; ANY-NEXT: orq %rdx, %rcx
-; ANY-NEXT: setne %al
-; ANY-NEXT: retq
+; NO512-LABEL: anybits_i512_load_arg:
+; NO512: # %bb.0:
+; NO512-NEXT: movq 16(%rdi), %rax
+; NO512-NEXT: movq (%rdi), %rcx
+; NO512-NEXT: movq 8(%rdi), %rdx
+; NO512-NEXT: movq 24(%rdi), %rsi
+; NO512-NEXT: orq 56(%rdi), %rsi
+; NO512-NEXT: orq 40(%rdi), %rdx
+; NO512-NEXT: orq %rsi, %rdx
+; NO512-NEXT: orq 48(%rdi), %rax
+; NO512-NEXT: orq 32(%rdi), %rcx
+; NO512-NEXT: orq %rax, %rcx
+; NO512-NEXT: orq %rdx, %rcx
+; NO512-NEXT: setne %al
+; NO512-NEXT: retq
+;
+; AVX512-LABEL: anybits_i512_load_arg:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%ld = load i512, ptr %w
%cmp = icmp ne i512 %ld, 0
ret i1 %cmp
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
index b2064b1..02d4d88 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
; CHECK-LABEL: segmentedStack:
; CHECK: ## %bb.0:
; CHECK-NEXT: cmpq %gs:816, %rsp
-; CHECK-NEXT: jbe LBB3_6
+; CHECK-NEXT: jbe LBB3_7
; CHECK-NEXT: LBB3_1: ## %entry
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: sete %al
-; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: sete %al
-; CHECK-NEXT: testb %cl, %cl
-; CHECK-NEXT: jne LBB3_4
-; CHECK-NEXT: ## %bb.2: ## %if.end4.i
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je LBB3_5
+; CHECK-NEXT: ## %bb.2: ## %entry
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB3_5
+; CHECK-NEXT: ## %bb.3: ## %if.end4.i
; CHECK-NEXT: movq 8(%rdi), %rdx
; CHECK-NEXT: cmpq 8(%rsi), %rdx
-; CHECK-NEXT: jne LBB3_5
-; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i
+; CHECK-NEXT: jne LBB3_6
+; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: movq (%rdi), %rdi
; CHECK-NEXT: callq _memcmp
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: sete %al
-; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit
+; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB3_5:
+; CHECK-NEXT: LBB3_6:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
-; CHECK-NEXT: LBB3_6:
+; CHECK-NEXT: LBB3_7:
; CHECK-NEXT: movl $8, %r10d
; CHECK-NEXT: movl $0, %r11d
; CHECK-NEXT: callq ___morestack
@@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key
; NOCOMPACTUNWIND-LABEL: segmentedStack:
; NOCOMPACTUNWIND: # %bb.0:
; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp
-; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6
+; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7
; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry
; NOCOMPACTUNWIND-NEXT: pushq %rax
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16
-; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi
-; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi
-; NOCOMPACTUNWIND-NEXT: sete %cl
-; NOCOMPACTUNWIND-NEXT: orb %al, %cl
; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax
; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax
; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: testb %cl, %cl
-; NOCOMPACTUNWIND-NEXT: jne .LBB3_4
-; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i
+; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi
+; NOCOMPACTUNWIND-NEXT: je .LBB3_5
+; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry
+; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi
+; NOCOMPACTUNWIND-NEXT: je .LBB3_5
+; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i
; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx
; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx
-; NOCOMPACTUNWIND-NEXT: jne .LBB3_5
-; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i
+; NOCOMPACTUNWIND-NEXT: jne .LBB3_6
+; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i
; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi
; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi
; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT
; NOCOMPACTUNWIND-NEXT: testl %eax, %eax
; NOCOMPACTUNWIND-NEXT: sete %al
-; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit
+; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit
; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax
; NOCOMPACTUNWIND-NEXT: popq %rcx
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8
; NOCOMPACTUNWIND-NEXT: retq
-; NOCOMPACTUNWIND-NEXT: .LBB3_5:
+; NOCOMPACTUNWIND-NEXT: .LBB3_6:
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16
; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax
; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax
; NOCOMPACTUNWIND-NEXT: popq %rcx
; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8
; NOCOMPACTUNWIND-NEXT: retq
-; NOCOMPACTUNWIND-NEXT: .LBB3_6:
+; NOCOMPACTUNWIND-NEXT: .LBB3_7:
; NOCOMPACTUNWIND-NEXT: movl $8, %r10d
; NOCOMPACTUNWIND-NEXT: movl $0, %r11d
; NOCOMPACTUNWIND-NEXT: callq __morestack
diff --git a/llvm/test/MC/AArch64/data-directive-specifier.s b/llvm/test/MC/AArch64/data-directive-specifier.s
index 2cb7eb3..2d1ec4f 100644
--- a/llvm/test/MC/AArch64/data-directive-specifier.s
+++ b/llvm/test/MC/AArch64/data-directive-specifier.s
@@ -12,6 +12,7 @@ l:
# CHECK-NEXT: 0x8 R_AARCH64_PLT32 extern 0x4
# CHECK-NEXT: 0xC R_AARCH64_PLT32 g 0x8
# CHECK-NEXT: 0x10 R_AARCH64_PLT32 g 0x18
+# CHECK-NEXT: 0x14 R_AARCH64_FUNCINIT64 .text 0x0
# CHECK-NEXT: }
.data
.word l@plt - .
@@ -21,6 +22,8 @@ l:
.word g@plt - . + 8
.word g@plt - .data + 8
+.quad l@funcinit
+
# CHECK: Section ({{.*}}) .rela.data1 {
# CHECK-NEXT: 0x0 R_AARCH64_GOTPCREL32 data1 0x0
# CHECK-NEXT: 0x4 R_AARCH64_GOTPCREL32 extern 0x4
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
index fdab67a..afc98ce 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg -verify-dom-info=1 %s | FileCheck %s
; This test checks that a constant propagation is applied for a basic loop.
; Related to bug 44679.
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
index f45798b..5076517 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
-; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
-; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE
; This test checks that the analysis identifies all threadable paths in a
; simple CFG. A threadable path includes a list of basic blocks, the exit
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
index 092c854..426b51e 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s
; These tests check that the DFA jump threading transformation is applied
; properly to two CFGs. It checks that blocks are cloned, branches are updated,
@@ -445,9 +445,67 @@ bb2: ; preds = %select.unfold
unreachable
}
+
+define i16 @DTU_update_crash() {
+; CHECK-LABEL: @DTU_update_crash(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY_SELECTBLOCK:%.*]]
+; CHECK: for.body.selectblock:
+; CHECK-NEXT: br i1 false, label [[SWITCHBLOCK_JT0:%.*]], label [[SEL_SI_UNFOLD_FALSE_JT0:%.*]]
+; CHECK: sel.si.unfold.false:
+; CHECK-NEXT: br label [[SWITCHBLOCK:%.*]]
+; CHECK: sel.si.unfold.false.jt0:
+; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ]
+; CHECK-NEXT: br label [[SWITCHBLOCK_JT0]]
+; CHECK: switchblock:
+; CHECK-NEXT: [[SWITCHBLOCK_PHI:%.*]] = phi i32 [ poison, [[SEL_SI_UNFOLD_FALSE:%.*]] ]
+; CHECK-NEXT: [[P_24_ADDR_3:%.*]] = phi i32 [ 0, [[SEL_SI_UNFOLD_FALSE]] ]
+; CHECK-NEXT: switch i32 [[SWITCHBLOCK_PHI]], label [[CLEANUP:%.*]] [
+; CHECK-NEXT: i32 0, label [[FOR_INC:%.*]]
+; CHECK-NEXT: i32 1, label [[CLEANUP]]
+; CHECK-NEXT: i32 5, label [[FOR_BODY_SELECTBLOCK]]
+; CHECK-NEXT: ]
+; CHECK: switchblock.jt0:
+; CHECK-NEXT: [[SWITCHBLOCK_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SEL_SI_UNFOLD_FALSE_JT0]] ]
+; CHECK-NEXT: [[P_24_ADDR_3_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ 0, [[SEL_SI_UNFOLD_FALSE_JT0]] ]
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: br i1 false, label [[FOR_BODY_SELECTBLOCK]], label [[CLEANUP]]
+; CHECK: cleanup:
+; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[P_24_ADDR_3_JT0]])
+; CHECK-NEXT: ret i16 0
+;
+entry:
+ br label %for.body.selectblock
+
+for.body.selectblock: ; preds = %for.inc, %switchblock, %entry
+ %sel = select i1 false, i32 0, i32 0
+ br label %switchblock
+
+switchblock: ; preds = %for.body.selectblock
+ %switchblock.phi = phi i32 [ %sel, %for.body.selectblock ]
+ %p_24.addr.3 = phi i32 [ 0, %for.body.selectblock ]
+ switch i32 %switchblock.phi, label %cleanup [
+ i32 0, label %for.inc
+ i32 1, label %cleanup
+ i32 5, label %for.body.selectblock
+ ]
+
+for.inc: ; preds = %switchblock
+ br i1 false, label %for.body.selectblock, label %cleanup
+
+cleanup: ; preds = %for.inc, %switchblock, %switchblock
+ call void (...) @llvm.fake.use(i32 %p_24.addr.3)
+ ret i16 0
+}
+
+declare void @llvm.fake.use(...)
+
!0 = !{!"function_entry_count", i32 10}
!1 = !{!"branch_weights", i32 3, i32 5}
;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+;.
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
;.
diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
index de38752..95d3ffa 100644
--- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false -verify-dom-info=1 %s | FileCheck %s
; These tests check if selects are unfolded properly for jump threading
; opportunities. There are three different patterns to consider:
diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
index 4555dfb..71a469d 100644
--- a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s
declare void @do_something()
declare void @user(i32)
diff --git a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
index 00500a7..cc117e7 100644
--- a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
+++ b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s
+; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s
define void @pr60254() {
; CHECK-LABEL: define void @pr60254() {
diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
index 89b132e..9371fe2 100644
--- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll
@@ -18,7 +18,7 @@ define i64 @test_ptr_compare_guard(ptr %start, ptr %end) {
; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1
-; CHECK-NEXT: [[I64_IV_NEXT]] = add i64 [[I64_IV]], 1
+; CHECK-NEXT: [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1
; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
; CHECK: [[EXIT_LOOPEXIT]]:
diff --git a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
index ba34930..bc988a9 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
define i1 @a_true_implies_b_true(i8 %z, i1 %X, i1 %Y) {
@@ -34,15 +34,15 @@ define <2 x i1> @a_true_implies_b_true_vec(i8 %z0, <2 x i1> %X, <2 x i1> %Y) {
ret <2 x i1> %res
}
-define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) {
+define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) !prof !0 {
; CHECK-LABEL: @a_true_implies_b_true2(
; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 [[Z:%.*]], 20
-; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i1 [[RES]]
;
%a = icmp ugt i8 %z, 20
%b = icmp ugt i8 %z, 10
- %sel = select i1 %b, i1 %X, i1 %Y
+ %sel = select i1 %b, i1 %X, i1 %Y, !prof !1
%res = and i1 %a, %sel
ret i1 %res
}
@@ -258,3 +258,10 @@ define i1 @neg_icmp_eq_implies_trunc(i8 %x, i1 %c) {
%sel2 = select i1 %cmp, i1 true, i1 %sel1
ret i1 %sel2
}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 2, i32 3}
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3}
+;.
diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll
index 5e7c636..a3483af 100644
--- a/llvm/test/Transforms/InstSimplify/ptrmask.ll
+++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll
@@ -158,6 +158,26 @@ define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) {
ret ptr addrspace(1) %r
}
+define ptr @ptrmask_simplify_ptrtoaddr(ptr %p) {
+; CHECK-LABEL: define ptr @ptrmask_simplify_ptrtoaddr
+; CHECK-SAME: (ptr [[P:%.*]]) {
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %m = ptrtoaddr ptr %p to i64
+ %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m)
+ ret ptr %r
+}
+
+define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32(ptr addrspace(1) %p) {
+; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) {
+; CHECK-NEXT: ret ptr addrspace(1) [[P]]
+;
+ %m = ptrtoaddr ptr addrspace(1) %p to i32
+ %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m)
+ ret ptr addrspace(1) %r
+}
+
define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) {
; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused
; CHECK-SAME: (ptr align 64 [[P:%.*]]) {
diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
index fa55eab..bc63f79 100644
--- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
@@ -465,8 +465,7 @@ define void @peel_int_eq_condition(i32 %start) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[IV]], [[START]]
-; CHECK-NEXT: br i1 [[C_0]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
+; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]]
; CHECK: if.then:
; CHECK-NEXT: call void @fn(i32 [[IV]])
; CHECK-NEXT: br label [[LOOP_LATCH]]
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index 616617b..5d5a610 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -36,11 +36,11 @@ exit:
ret i32 %result
}
-define i32 @test2() {
+define i32 @test2() !prof !0 {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1
-; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i32 [[RESULT_SROA_SPECULATED]]
;
entry:
@@ -53,7 +53,7 @@ entry:
%v1 = load i32, ptr %a1
%cond = icmp sle i32 %v0, %v1
- %select = select i1 %cond, ptr %a1, ptr %a
+ %select = select i1 %cond, ptr %a1, ptr %a, !prof !1
%result = load i32, ptr %select
ret i32 %result
@@ -870,3 +870,17 @@ define i8 @volatile_select(ptr %p, i1 %b) {
%v2 = load i8, ptr %px
ret i8 %v2
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address }
+;.
+; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address }
+;.
+; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+;.
+; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-MODIFY-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+;.
diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll
index 776624c..45c3bbd 100644
--- a/llvm/test/Transforms/SROA/phi-gep.ll
+++ b/llvm/test/Transforms/SROA/phi-gep.ll
@@ -1,9 +1,12 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
%pair = type { i32, i32 }
+;.
+; CHECK: @g = global %pair zeroinitializer, align 4
+;.
define i32 @test_sroa_phi_gep(i1 %cond) {
; CHECK-LABEL: @test_sroa_phi_gep(
; CHECK-NEXT: entry:
@@ -334,18 +337,18 @@ exit:
unreachable
}
-define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) {
+define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) !prof !0 {
; CHECK-LABEL: @test_sroa_gep_phi_select_same_block(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[PAIR:%.*]], align 8
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ]
-; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]]
+; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i64 1
; CHECK-NEXT: [[PTR_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PTR]], i64 1
-; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]]
-; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]]
+; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]], !prof [[PROF2:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -355,9 +358,9 @@ entry:
while.body:
%phi = phi ptr [ %alloca, %entry ], [ %select, %while.body ]
- %select = select i1 %c1, ptr %phi, ptr %ptr
+ %select = select i1 %c1, ptr %phi, ptr %ptr, !prof !1
%gep = getelementptr inbounds %pair, ptr %select, i64 1
- br i1 %c2, label %exit, label %while.body
+ br i1 %c2, label %exit, label %while.body, !prof !2
exit:
ret void
@@ -747,6 +750,18 @@ declare ptr @foo()
declare i32 @__gxx_personality_v0(...)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5}
+!2 = !{!"branch_weights", i32 7, i32 11}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5}
+; CHECK: [[PROF2]] = !{!"branch_weights", i32 7, i32 11}
+;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-MODIFY-CFG: {{.*}}
; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll
index b48b0f7..a701d78 100644
--- a/llvm/test/Transforms/SROA/select-gep.ll
+++ b/llvm/test/Transforms/SROA/select-gep.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
@@ -203,10 +203,10 @@ define i32 @test_select_idx_mem2reg(i1 %c) {
; Test gep with a select-like zext index unfolding on an alloca that is
; splittable and promotable.
-define i64 @test_select_like_zext_idx_mem2reg(i1 %c) {
+define i64 @test_select_like_zext_idx_mem2reg(i1 %c) !prof !0 {
; CHECK-LABEL: @test_select_like_zext_idx_mem2reg(
; CHECK-NEXT: [[IDX:%.*]] = zext i1 [[C:%.*]] to i64
-; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: ret i64 [[RES]]
;
%alloca = alloca [2 x i64], align 8
@@ -352,3 +352,16 @@ define i32 @test_select_idx_not_constant3(i1 %c, ptr %p, i64 %arg) {
%res = load i32, ptr %gep, align 4
ret i32 %res
}
+
+!0 = !{!"function_entry_count", i32 10}
+;.
+; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+;.
+; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
+; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK-MODIFY-CFG: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml
new file mode 100644
index 0000000..e748eecf
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml
@@ -0,0 +1,27 @@
+# RUN: yaml2obj %s -o %t.dxbc
+# RUN: not llvm-objcopy --dump-section=FKE0=%t.fek0 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-ZEROSIZE -DFILE=%t.fek0
+# RUN: not llvm-objcopy --dump-section=FKE3=%t.fek1 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING -DFILE=%t.fek1
+# RUN: not llvm-objcopy --dump-section=FKE2=%t/does_not_exist/.fek2 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-PATH -DFILE=%t/does_not_exist/.fek2 -DMSG=%errc_ENOENT
+
+# CHECK-ZEROSIZE: error: '[[FILE]]': part 'FKE0' is empty
+# CHECK-MISSING: error: '[[FILE]]': part 'FKE3' not found
+# CHECK-BAD-PATH: error: '[[FILE]]': [[MSG]]
+
+--- !dxcontainer
+Header:
+ Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
+ Version:
+ Major: 1
+ Minor: 0
+ FileSize: 108
+ PartCount: 3
+ PartOffsets: [ 60, 68, 76 ]
+Parts:
+ - Name: FKE0
+ Size: 0
+ - Name: FKE1
+ Size: 0
+ - Name: FKE2
+ Size: 8
+...
diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml
new file mode 100644
index 0000000..7d80a2c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml
@@ -0,0 +1,278 @@
+# RUN: yaml2obj %s -o %t.dxbc
+# RUN: llvm-objcopy --dump-section=DXIL=%t.bc %t.dxbc
+# RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=BITCODE
+# RUN: wc -c %t.bc | FileCheck %s --check-prefix=DXIL-SIZE
+
+## Verify that when dumping the DXIL part we get a valid bitcode file.
+# BITCODE: define void @main()
+## Verify the size of the bitcode data.
+# DXIL-SIZE: 1708
+
+## Dump the PSV0 part and verify its size.
+# RUN: llvm-objcopy --dump-section=PSV0=%t.psv0 %t.dxbc
+# RUN: wc -c %t.psv0 | FileCheck %s --check-prefix=PSV0-SIZE
+# RUN: od -v -Ax -t x1 %t.psv0 | FileCheck %s --check-prefix=PSV0-CONTENTS
+# PSV0-SIZE: 76
+
+# For a compute shader the structure size is encoded followed by a bunch of 00'd
+# bytes until you get to the unused wave size min and max (0xffff), followed by
+# the shader stage (5 for compute).
+# TODO: Update this test to use objdump or obj2yaml once we support
+# --add-section in objcopy. See issue:
+# https://github.com/llvm/llvm-project/issues/162159.
+# PSV0-CONTENTS: 0000 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+# PSV0-CONTENTS: 0010 00 00 00 00 00 00 00 00 ff ff ff ff 05 00 00 00
+
+--- !dxcontainer
+Header:
+ Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ]
+ Version:
+ Major: 1
+ Minor: 0
+ FileSize: 1872
+ PartCount: 2
+ PartOffsets: [ 40, 1780 ]
+Parts:
+ - Name: DXIL
+ Size: 1732
+ Program:
+ MajorVersion: 6
+ MinorVersion: 0
+ ShaderKind: 5
+ Size: 433
+ DXILMajorVersion: 1
+ DXILMinorVersion: 0
+ DXILSize: 1708
+ DXIL: [ 0x42, 0x43, 0xC0, 0xDE, 0x21, 0xC, 0x0, 0x0, 0xA8,
+ 0x1, 0x0, 0x0, 0xB, 0x82, 0x20, 0x0, 0x2, 0x0,
+ 0x0, 0x0, 0x13, 0x0, 0x0, 0x0, 0x7, 0x81, 0x23,
+ 0x91, 0x41, 0xC8, 0x4, 0x49, 0x6, 0x10, 0x32,
+ 0x39, 0x92, 0x1, 0x84, 0xC, 0x25, 0x5, 0x8, 0x19,
+ 0x1E, 0x4, 0x8B, 0x62, 0x80, 0x10, 0x45, 0x2,
+ 0x42, 0x92, 0xB, 0x42, 0x84, 0x10, 0x32, 0x14,
+ 0x38, 0x8, 0x18, 0x4B, 0xA, 0x32, 0x42, 0x88,
+ 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xA5,
+ 0x0, 0x19, 0x32, 0x42, 0xE4, 0x48, 0xE, 0x90,
+ 0x11, 0x22, 0xC4, 0x50, 0x41, 0x51, 0x81, 0x8C,
+ 0xE1, 0x83, 0xE5, 0x8A, 0x4, 0x21, 0x46, 0x6,
+ 0x51, 0x18, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1B,
+ 0x90, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xC0,
+ 0x1, 0x24, 0x80, 0x2, 0x0, 0x0, 0x0, 0x49, 0x18,
+ 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x13, 0x82, 0x0,
+ 0x0, 0x89, 0x20, 0x0, 0x0, 0x11, 0x0, 0x0, 0x0,
+ 0x32, 0x22, 0x8, 0x9, 0x20, 0x64, 0x85, 0x4, 0x13,
+ 0x22, 0xA4, 0x84, 0x4, 0x13, 0x22, 0xE3, 0x84,
+ 0xA1, 0x90, 0x14, 0x12, 0x4C, 0x88, 0x8C, 0xB,
+ 0x84, 0x84, 0x4C, 0x10, 0x20, 0x73, 0x4, 0x8,
+ 0xC1, 0x65, 0xC3, 0x85, 0x2C, 0xE8, 0x3, 0x40,
+ 0x14, 0x91, 0x4E, 0xD1, 0x4A, 0x48, 0x44, 0x54,
+ 0x11, 0xC3, 0x9, 0x30, 0xC4, 0x18, 0x1, 0x30,
+ 0x2, 0x50, 0x82, 0x21, 0x1A, 0x8, 0x98, 0x23,
+ 0x0, 0x3, 0x0, 0x13, 0x14, 0x72, 0xC0, 0x87, 0x74,
+ 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x3,
+ 0x72, 0xC0, 0x87, 0xD, 0xAE, 0x50, 0xE, 0x6D,
+ 0xD0, 0xE, 0x7A, 0x50, 0xE, 0x6D, 0x0, 0xF, 0x7A,
+ 0x30, 0x7, 0x72, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D,
+ 0x90, 0xE, 0x71, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D,
+ 0x90, 0xE, 0x78, 0xA0, 0x7, 0x78, 0xD0, 0x6, 0xE9,
+ 0x10, 0x7, 0x76, 0xA0, 0x7, 0x71, 0x60, 0x7, 0x6D,
+ 0x90, 0xE, 0x73, 0x20, 0x7, 0x7A, 0x30, 0x7, 0x72,
+ 0xD0, 0x6, 0xE9, 0x60, 0x7, 0x74, 0xA0, 0x7, 0x76,
+ 0x40, 0x7, 0x6D, 0x60, 0xE, 0x71, 0x60, 0x7, 0x7A,
+ 0x10, 0x7, 0x76, 0xD0, 0x6, 0xE6, 0x30, 0x7, 0x72,
+ 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, 0x60, 0xE, 0x76,
+ 0x40, 0x7, 0x7A, 0x60, 0x7, 0x74, 0xD0, 0x6, 0xEE,
+ 0x80, 0x7, 0x7A, 0x10, 0x7, 0x76, 0xA0, 0x7, 0x73,
+ 0x20, 0x7, 0x7A, 0x60, 0x7, 0x74, 0x30, 0xE4,
+ 0x21, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x0, 0x0, 0x20, 0xB, 0x4, 0x7, 0x0, 0x0, 0x0,
+ 0x32, 0x1E, 0x98, 0xC, 0x19, 0x11, 0x4C, 0x90,
+ 0x8C, 0x9, 0x26, 0x47, 0xC6, 0x4, 0x43, 0xBA,
+ 0x12, 0x28, 0x88, 0x62, 0x18, 0x1, 0x28, 0x84,
+ 0x22, 0x0, 0x0, 0x0, 0x79, 0x18, 0x0, 0x0, 0xE5,
+ 0x0, 0x0, 0x0, 0x33, 0x8, 0x80, 0x1C, 0xC4, 0xE1,
+ 0x1C, 0x66, 0x14, 0x1, 0x3D, 0x88, 0x43, 0x38,
+ 0x84, 0xC3, 0x8C, 0x42, 0x80, 0x7, 0x79, 0x78,
+ 0x7, 0x73, 0x98, 0x71, 0xC, 0xE6, 0x0, 0xF, 0xED,
+ 0x10, 0xE, 0xF4, 0x80, 0xE, 0x33, 0xC, 0x42, 0x1E,
+ 0xC2, 0xC1, 0x1D, 0xCE, 0xA1, 0x1C, 0x66, 0x30,
+ 0x5, 0x3D, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1B,
+ 0xCC, 0x3, 0x3D, 0xC8, 0x43, 0x3D, 0x8C, 0x3,
+ 0x3D, 0xCC, 0x78, 0x8C, 0x74, 0x70, 0x7, 0x7B,
+ 0x8, 0x7, 0x79, 0x48, 0x87, 0x70, 0x70, 0x7, 0x7A,
+ 0x70, 0x3, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87,
+ 0x19, 0xCC, 0x11, 0xE, 0xEC, 0x90, 0xE, 0xE1,
+ 0x30, 0xF, 0x6E, 0x30, 0xF, 0xE3, 0xF0, 0xE, 0xF0,
+ 0x50, 0xE, 0x33, 0x10, 0xC4, 0x1D, 0xDE, 0x21,
+ 0x1C, 0xD8, 0x21, 0x1D, 0xC2, 0x61, 0x1E, 0x66,
+ 0x30, 0x89, 0x3B, 0xBC, 0x83, 0x3B, 0xD0, 0x43,
+ 0x39, 0xB4, 0x3, 0x3C, 0xBC, 0x83, 0x3C, 0x84,
+ 0x3, 0x3B, 0xCC, 0xF0, 0x14, 0x76, 0x60, 0x7,
+ 0x7B, 0x68, 0x7, 0x37, 0x68, 0x87, 0x72, 0x68,
+ 0x7, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70,
+ 0x60, 0x7, 0x76, 0x28, 0x7, 0x76, 0xF8, 0x5, 0x76,
+ 0x78, 0x87, 0x77, 0x80, 0x87, 0x5F, 0x8, 0x87,
+ 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98,
+ 0x81, 0x2C, 0xEE, 0xF0, 0xE, 0xEE, 0xE0, 0xE,
+ 0xF5, 0xC0, 0xE, 0xEC, 0x30, 0x3, 0x62, 0xC8,
+ 0xA1, 0x1C, 0xE4, 0xA1, 0x1C, 0xCC, 0xA1, 0x1C,
+ 0xE4, 0xA1, 0x1C, 0xDC, 0x61, 0x1C, 0xCA, 0x21,
+ 0x1C, 0xC4, 0x81, 0x1D, 0xCA, 0x61, 0x6, 0xD6,
+ 0x90, 0x43, 0x39, 0xC8, 0x43, 0x39, 0x98, 0x43,
+ 0x39, 0xC8, 0x43, 0x39, 0xB8, 0xC3, 0x38, 0x94,
+ 0x43, 0x38, 0x88, 0x3, 0x3B, 0x94, 0xC3, 0x2F,
+ 0xBC, 0x83, 0x3C, 0xFC, 0x82, 0x3B, 0xD4, 0x3,
+ 0x3B, 0xB0, 0xC3, 0xC, 0xC7, 0x69, 0x87, 0x70,
+ 0x58, 0x87, 0x72, 0x70, 0x83, 0x74, 0x68, 0x7,
+ 0x78, 0x60, 0x87, 0x74, 0x18, 0x87, 0x74, 0xA0,
+ 0x87, 0x19, 0xCE, 0x53, 0xF, 0xEE, 0x0, 0xF, 0xF2,
+ 0x50, 0xE, 0xE4, 0x90, 0xE, 0xE3, 0x40, 0xF, 0xE1,
+ 0x20, 0xE, 0xEC, 0x50, 0xE, 0x33, 0x20, 0x28,
+ 0x1D, 0xDC, 0xC1, 0x1E, 0xC2, 0x41, 0x1E, 0xD2,
+ 0x21, 0x1C, 0xDC, 0x81, 0x1E, 0xDC, 0xE0, 0x1C,
+ 0xE4, 0xE1, 0x1D, 0xEA, 0x1, 0x1E, 0x66, 0x18,
+ 0x51, 0x38, 0xB0, 0x43, 0x3A, 0x9C, 0x83, 0x3B,
+ 0xCC, 0x50, 0x24, 0x76, 0x60, 0x7, 0x7B, 0x68,
+ 0x7, 0x37, 0x60, 0x87, 0x77, 0x78, 0x7, 0x78,
+ 0x98, 0x51, 0x4C, 0xF4, 0x90, 0xF, 0xF0, 0x50,
+ 0xE, 0x33, 0x1E, 0x6A, 0x1E, 0xCA, 0x61, 0x1C,
+ 0xE8, 0x21, 0x1D, 0xDE, 0xC1, 0x1D, 0x7E, 0x1,
+ 0x1E, 0xE4, 0xA1, 0x1C, 0xCC, 0x21, 0x1D, 0xF0,
+ 0x61, 0x6, 0x54, 0x85, 0x83, 0x38, 0xCC, 0xC3,
+ 0x3B, 0xB0, 0x43, 0x3D, 0xD0, 0x43, 0x39, 0xFC,
+ 0xC2, 0x3C, 0xE4, 0x43, 0x3B, 0x88, 0xC3, 0x3B,
+ 0xB0, 0xC3, 0x8C, 0xC5, 0xA, 0x87, 0x79, 0x98,
+ 0x87, 0x77, 0x18, 0x87, 0x74, 0x8, 0x7, 0x7A,
+ 0x28, 0x7, 0x72, 0x98, 0x81, 0x5C, 0xE3, 0x10,
+ 0xE, 0xEC, 0xC0, 0xE, 0xE5, 0x50, 0xE, 0xF3, 0x30,
+ 0x23, 0xC1, 0xD2, 0x41, 0x1E, 0xE4, 0xE1, 0x17,
+ 0xD8, 0xE1, 0x1D, 0xDE, 0x1, 0x1E, 0x66, 0x48,
+ 0x19, 0x3B, 0xB0, 0x83, 0x3D, 0xB4, 0x83, 0x1B,
+ 0x84, 0xC3, 0x38, 0x8C, 0x43, 0x39, 0xCC, 0xC3,
+ 0x3C, 0xB8, 0xC1, 0x39, 0xC8, 0xC3, 0x3B, 0xD4,
+ 0x3, 0x3C, 0xCC, 0x48, 0xB4, 0x71, 0x8, 0x7, 0x76,
+ 0x60, 0x7, 0x71, 0x8, 0x87, 0x71, 0x58, 0x87,
+ 0x19, 0xDB, 0xC6, 0xE, 0xEC, 0x60, 0xF, 0xED,
+ 0xE0, 0x6, 0xF0, 0x20, 0xF, 0xE5, 0x30, 0xF, 0xE5,
+ 0x20, 0xF, 0xF6, 0x50, 0xE, 0x6E, 0x10, 0xE, 0xE3,
+ 0x30, 0xE, 0xE5, 0x30, 0xF, 0xF3, 0xE0, 0x6, 0xE9,
+ 0xE0, 0xE, 0xE4, 0x50, 0xE, 0xF8, 0x30, 0x23,
+ 0xE2, 0xEC, 0x61, 0x1C, 0xC2, 0x81, 0x1D, 0xD8,
+ 0xE1, 0x17, 0xEC, 0x21, 0x1D, 0xE6, 0x21, 0x1D,
+ 0xC4, 0x21, 0x1D, 0xD8, 0x21, 0x1D, 0xE8, 0x21,
+ 0x1F, 0x66, 0x20, 0x9D, 0x3B, 0xBC, 0x43, 0x3D,
+ 0xB8, 0x3, 0x39, 0x94, 0x83, 0x39, 0xCC, 0x58,
+ 0xBC, 0x70, 0x70, 0x7, 0x77, 0x78, 0x7, 0x7A,
+ 0x8, 0x7, 0x7A, 0x48, 0x87, 0x77, 0x70, 0x87,
+ 0x19, 0xCB, 0xE7, 0xE, 0xEF, 0x30, 0xF, 0xE1,
+ 0xE0, 0xE, 0xE9, 0x40, 0xF, 0xE9, 0xA0, 0xF, 0xE5,
+ 0x30, 0xC3, 0x1, 0x3, 0x73, 0xA8, 0x7, 0x77, 0x18,
+ 0x87, 0x5F, 0x98, 0x87, 0x70, 0x70, 0x87, 0x74,
+ 0xA0, 0x87, 0x74, 0xD0, 0x87, 0x72, 0x98, 0x81,
+ 0x84, 0x41, 0x39, 0xE0, 0xC3, 0x38, 0xB0, 0x43,
+ 0x3D, 0x90, 0x43, 0x39, 0xCC, 0x40, 0xC4, 0xA0,
+ 0x1D, 0xCA, 0xA1, 0x1D, 0xE0, 0x41, 0x1E, 0xDE,
+ 0xC1, 0x1C, 0x66, 0x24, 0x63, 0x30, 0xE, 0xE1,
+ 0xC0, 0xE, 0xEC, 0x30, 0xF, 0xE9, 0x40, 0xF, 0xE5,
+ 0x30, 0x43, 0x21, 0x83, 0x75, 0x18, 0x7, 0x73,
+ 0x48, 0x87, 0x5F, 0xA0, 0x87, 0x7C, 0x80, 0x87,
+ 0x72, 0x98, 0xB1, 0x94, 0x1, 0x3C, 0x8C, 0xC3,
+ 0x3C, 0x94, 0xC3, 0x38, 0xD0, 0x43, 0x3A, 0xBC,
+ 0x83, 0x3B, 0xCC, 0xC3, 0x8C, 0xC5, 0xC, 0x48,
+ 0x21, 0x15, 0x42, 0x61, 0x1E, 0xE6, 0x21, 0x1D,
+ 0xCE, 0xC1, 0x1D, 0x52, 0x81, 0x14, 0x66, 0x4C,
+ 0x67, 0x30, 0xE, 0xEF, 0x20, 0xF, 0xEF, 0xE0,
+ 0x6, 0xEF, 0x50, 0xF, 0xF4, 0x30, 0xF, 0xE9, 0x40,
+ 0xE, 0xE5, 0xE0, 0x6, 0xE6, 0x20, 0xF, 0xE1, 0xD0,
+ 0xE, 0xE5, 0x30, 0xA3, 0x40, 0x83, 0x76, 0x68,
+ 0x7, 0x79, 0x8, 0x87, 0x19, 0x52, 0x1A, 0xB8,
+ 0xC3, 0x3B, 0x84, 0x3, 0x3B, 0xA4, 0x43, 0x38,
+ 0xCC, 0x83, 0x1B, 0x84, 0x3, 0x39, 0x90, 0x83,
+ 0x3C, 0xCC, 0x3, 0x3C, 0x84, 0xC3, 0x38, 0x94,
+ 0xC3, 0xC, 0x46, 0xD, 0xC6, 0x21, 0x1C, 0xD8,
+ 0x81, 0x1D, 0xCA, 0xA1, 0x1C, 0x7E, 0x81, 0x1E,
+ 0xF2, 0x1, 0x1E, 0xCA, 0x61, 0xC6, 0xB1, 0x6,
+ 0xEE, 0xF0, 0xE, 0xE6, 0x20, 0xF, 0xE5, 0x50,
+ 0xE, 0x33, 0x1C, 0x36, 0x20, 0x7, 0x7C, 0x70,
+ 0x3, 0x77, 0x78, 0x7, 0x77, 0xA8, 0x7, 0x77, 0x48,
+ 0x7, 0x73, 0x78, 0x7, 0x79, 0x68, 0x87, 0x19,
+ 0x55, 0x1B, 0x90, 0x3, 0x3E, 0xB8, 0xC1, 0x38,
+ 0xBC, 0x83, 0x3B, 0xD0, 0x83, 0x3C, 0xBC, 0x3,
+ 0x3B, 0x98, 0x3, 0x3B, 0xBC, 0xC3, 0x3D, 0xB8,
+ 0x1, 0x3A, 0xA4, 0x83, 0x3B, 0xD0, 0xC3, 0x3C,
+ 0xCC, 0x58, 0xDC, 0x80, 0x1C, 0xF0, 0xC1, 0xD,
+ 0xE0, 0x41, 0x1E, 0xCA, 0x61, 0x1C, 0xD2, 0x61,
+ 0x1E, 0xCA, 0x1, 0x0, 0x79, 0x28, 0x0, 0x0, 0x52,
+ 0x0, 0x0, 0x0, 0xC2, 0x3C, 0x90, 0x40, 0x86, 0x10,
+ 0x19, 0x32, 0xE2, 0x64, 0x90, 0x40, 0x46, 0x2,
+ 0x19, 0x23, 0x23, 0x46, 0x2, 0x13, 0x24, 0xC6,
+ 0x0, 0x13, 0x74, 0xCE, 0x61, 0x8C, 0x2D, 0xCC,
+ 0xED, 0xC, 0xC4, 0xAE, 0x4C, 0x6E, 0x2E, 0xED,
+ 0xCD, 0xD, 0x44, 0x46, 0xC6, 0x5, 0xC6, 0x5, 0xE6,
+ 0x2C, 0x8D, 0xE, 0x4, 0xE5, 0x2C, 0x8D, 0xE, 0xE8,
+ 0x2C, 0x8D, 0xE, 0xAD, 0x4E, 0xCC, 0x65, 0xEC,
+ 0xAD, 0x4D, 0x87, 0x8D, 0xCD, 0xAE, 0xED, 0x85,
+ 0x8D, 0xCD, 0xAE, 0xAD, 0x5, 0x4E, 0xEE, 0x4D,
+ 0xAD, 0x6C, 0x8C, 0xCE, 0xE5, 0x2C, 0x8D, 0xE,
+ 0xA4, 0xEC, 0xC6, 0x86, 0xA6, 0x2C, 0x26, 0x7,
+ 0xA6, 0xAC, 0xC, 0x26, 0x26, 0xE7, 0x46, 0x6C,
+ 0x2C, 0xA6, 0xC, 0x66, 0xA6, 0x6C, 0xC6, 0x4C,
+ 0x6, 0x86, 0x6C, 0x4C, 0x6, 0x46, 0xCC, 0x66,
+ 0x6C, 0x2C, 0xC, 0x27, 0x46, 0x6C, 0x86, 0x6C,
+ 0x2C, 0xE5, 0x8, 0x63, 0x73, 0x87, 0x68, 0xB,
+ 0x4B, 0x73, 0x3B, 0xCA, 0xDD, 0x18, 0x5A, 0x98,
+ 0xDC, 0xD7, 0x5C, 0x9A, 0x5E, 0xD9, 0x69, 0xCC,
+ 0xE4, 0xC2, 0xDA, 0xCA, 0x5A, 0xE0, 0xDE, 0xD2,
+ 0xDC, 0xE8, 0xCA, 0xE4, 0x86, 0x20, 0x1C, 0xC1,
+ 0x10, 0x84, 0x43, 0x18, 0x82, 0x70, 0xC, 0x43,
+ 0x10, 0xE, 0x62, 0x8, 0x42, 0x1, 0xC, 0x41, 0x38,
+ 0x8A, 0x21, 0x8, 0x87, 0x31, 0x6, 0xC1, 0x38,
+ 0xC6, 0x10, 0x4, 0x63, 0x18, 0x4, 0x24, 0x19,
+ 0x83, 0x60, 0x24, 0x63, 0x18, 0xC, 0xC3, 0x18,
+ 0x83, 0xB0, 0x44, 0x63, 0x28, 0x94, 0x1, 0x0,
+ 0xA4, 0x31, 0xC, 0x6, 0xB1, 0x8C, 0x61, 0x60,
+ 0xA, 0xC6, 0x24, 0x64, 0x78, 0x2E, 0x76, 0x61,
+ 0x6C, 0x76, 0x65, 0x72, 0x43, 0x9, 0x18, 0xA3,
+ 0xB0, 0xB1, 0xD9, 0xB5, 0xB9, 0xA4, 0x91, 0x95,
+ 0xB9, 0xD1, 0xD, 0x25, 0x68, 0x8C, 0x43, 0x86,
+ 0xE7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xD7,
+ 0xF4, 0x46, 0x56, 0xC6, 0x36, 0x94, 0xC0, 0x31,
+ 0xA, 0x19, 0x9E, 0x8B, 0x5D, 0x99, 0xDC, 0x5C,
+ 0xDA, 0x9B, 0xDB, 0x50, 0x82, 0xC7, 0x38, 0x64,
+ 0x78, 0x2E, 0x65, 0x6E, 0x74, 0x72, 0x79, 0x50,
+ 0x6F, 0x69, 0x6E, 0x74, 0x73, 0x43, 0x9, 0x24,
+ 0x13, 0xB1, 0xB1, 0xD9, 0xB5, 0xB9, 0xB4, 0xBD,
+ 0x91, 0xD5, 0xB1, 0x95, 0xB9, 0x98, 0xB1, 0x85,
+ 0x9D, 0xCD, 0xD, 0x45, 0x98, 0x28, 0x0, 0x71,
+ 0x20, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x40,
+ 0x30, 0x0, 0xD2, 0x0, 0x0, 0x0, 0x61, 0x20, 0x0,
+ 0x0, 0x6, 0x0, 0x0, 0x0, 0x13, 0x4, 0x1, 0x86,
+ 0x3, 0x1, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x7, 0x50,
+ 0x10, 0xCD, 0x14, 0x61, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0 ]
+ - Name: PSV0
+ Size: 76
+ PSVInfo:
+ Version: 3
+ ShaderStage: 5
+ MinimumWaveLaneCount: 0
+ MaximumWaveLaneCount: 4294967295
+ UsesViewID: 0
+ SigInputVectors: 0
+ SigOutputVectors: [ 0, 0, 0, 0 ]
+ NumThreadsX: 1
+ NumThreadsY: 1
+ NumThreadsZ: 1
+ EntryName: main
+ ResourceStride: 24
+ Resources: []
+ SigInputElements: []
+ SigOutputElements: []
+ SigPatchOrPrimElements: []
+ InputOutputMap:
+ - [ ]
+ - [ ]
+ - [ ]
+ - [ ]
+...
diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
index fd1492f..bcffd40 100644
--- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
+++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test
@@ -34,6 +34,7 @@
# CHECK-NEXT: {
# CHECK-NEXT: ID: 0
# CHECK-NEXT: Offset: 0x0
+# CHECK-NEXT: Hash: 0x0
# CHECK-NEXT: Size: 0x1
# CHECK-NEXT: HasReturn: No
# CHECK-NEXT: HasTailCall: Yes
@@ -50,6 +51,7 @@
# CHECK-NEXT: ID: 2
# CHECK-NEXT: Offset: 0x3
# CHECK-NEXT: Callsite End Offsets: [1, 3]
+# CHECK-NEXT: Hash: 0x123
# CHECK-NEXT: Size: 0x7
# CHECK-NEXT: HasReturn: Yes
# CHECK-NEXT: HasTailCall: No
@@ -144,8 +146,8 @@ Sections:
ShSize: [[SIZE=<none>]]
Link: .text
Entries:
- - Version: 3
- Feature: 0x28
+ - Version: 4
+ Feature: 0x68
BBRanges:
- BaseAddress: [[ADDR=0x11111]]
BBEntries:
@@ -160,6 +162,7 @@ Sections:
Size: 0x4
Metadata: 0x15
CallsiteEndOffsets: [ 0x1 , 0x2 ]
+ Hash: 0x123
- Version: 2
BBRanges:
- BaseAddress: 0x22222
diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
index dc14025..7a22efe 100644
--- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml
@@ -162,6 +162,92 @@ Sections:
BBRanges:
- BaseAddress: 0x20
+## Check that obj2yaml can dump basic block hash in the .llvm_bb_addr_map section.
+
+# RUN: yaml2obj --docnum=4 %s -o %t4
+# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=BBHASH
+
+# BBHASH: --- !ELF
+# BBHASH-NEXT: FileHeader:
+# BBHASH-NEXT: Class: ELFCLASS64
+# BBHASH-NEXT: Data: ELFDATA2LSB
+# BBHASH-NEXT: Type: ET_EXEC
+# BBHASH-NEXT: Sections:
+# BBHASH-NEXT: - Name: .llvm_bb_addr_map
+# BBHASH-NEXT: Type: SHT_LLVM_BB_ADDR_MAP
+# BBHASH-NEXT: Entries:
+# BBHASH-NEXT: - Version: 4
+# BBHASH-NEXT: Feature: 0x40
+# BBHASH-NEXT: BBRanges:
+# BBHASH-NEXT: - BBEntries:
+# BBHASH-NEXT: - ID: 0
+# BBHASH-NEXT: AddressOffset: 0x1
+# BBHASH-NEXT: Size: 0x2
+# BBHASH-NEXT: Metadata: 0x3
+# BBHASH-NEXT: Hash: 0x1
+# BBHASH-NEXT: - ID: 2
+# BBHASH-NEXT: AddressOffset: 0x4
+# BBHASH-NEXT: Size: 0x5
+# BBHASH-NEXT: Metadata: 0x6
+# BBHASH-NEXT: Hash: 0x2
+# BBHASH-NEXT: - ID: 4
+# BBHASH-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7
+# BBHASH-NEXT: Size: 0xFFFFFFFFFFFFFFF8
+# BBHASH-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9
+# BBHASH-NEXT: Hash: 0x3
+# BBHASH-NEXT: - Version: 4
+# BBHASH-NEXT: Feature: 0x68
+# BBHASH-NEXT: BBRanges:
+# BBHASH-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20
+# BBHASH-NEXT: BBEntries:
+# BBHASH-NEXT: - ID: 6
+# BBHASH-NEXT: AddressOffset: 0xA
+# BBHASH-NEXT: Size: 0xB
+# BBHASH-NEXT: Metadata: 0xC
+# BBHASH-NEXT: CallsiteEndOffsets: [ 0x1, 0x2 ]
+# BBHASH-NEXT: Hash: 0x123
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+Sections:
+ - Name: .llvm_bb_addr_map
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 4
+ Feature: 0x40
+ BBRanges:
+ - BaseAddress: 0x0
+ BBEntries:
+ - ID: 0
+ AddressOffset: 0x1
+ Size: 0x2
+ Metadata: 0x3
+ Hash: 0x1
+ - ID: 2
+ AddressOffset: 0x4
+ Size: 0x5
+ Metadata: 0x6
+ Hash: 0x2
+ - ID: 4
+ AddressOffset: 0xFFFFFFFFFFFFFFF7
+ Size: 0xFFFFFFFFFFFFFFF8
+ Metadata: 0xFFFFFFFFFFFFFFF9
+ Hash: 0x3
+ - Version: 4
+ Feature: 0x68
+ BBRanges:
+ - BaseAddress: 0xFFFFFFFFFFFFFF20
+ BBEntries:
+ - ID: 6
+ AddressOffset: 0xA
+ Size: 0xB
+ Metadata: 0xC
+ CallsiteEndOffsets: [ 0x1, 0x2 ]
+ Hash: 0x123
+
## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section
## when it can't extract the entries, for example, when the section is truncated, or
## when an invalid 'NumBlocks' or 'NumBBRanges` field is specified.
diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
index 418f90f..339e419 100644
--- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml
@@ -72,6 +72,13 @@
# CHECK-NEXT: 0000: 03202000 00000000 0000010E 01000203
# CHECK-NEXT: )
+# Case 10: Specify basic block hash.
+# CHECK: Name: .llvm_bb_addr_map (1)
+# CHECK: SectionData (
+# CHECK-NEXT: 0000: 04602000 00000000 0000010E 01000203
+# CHECK-NEXT: 0010: 23010000 00000000
+# CHECK-NEXT: )
+
--- !ELF
FileHeader:
@@ -176,6 +183,22 @@ Sections:
Metadata: 0x00000003
CallsiteEndOffsets: []
+## 10) We can produce a SHT_LLVM_BB_ADDR_MAP section with basic block hash.
+ - Name: '.llvm_bb_addr_map (10)'
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Entries:
+ - Version: 4
+ Feature: 0x60
+ BBRanges:
+ - BaseAddress: 0x0000000000000020
+ BBEntries:
+ - ID: 14
+ AddressOffset: 0x00000001
+ Size: 0x00000002
+ Metadata: 0x00000003
+ CallsiteEndOffsets: []
+ Hash: 0x123
+
## Check we can't use Entries at the same time as either Content or Size.
# RUN: not yaml2obj --docnum=2 -DCONTENT="00" %s 2>&1 | FileCheck %s --check-prefix=INVALID
# RUN: not yaml2obj --docnum=2 -DSIZE="0" %s 2>&1 | FileCheck %s --check-prefix=INVALID
@@ -197,7 +220,7 @@ Sections:
## Check that yaml2obj generates a warning when we use unsupported versions.
# RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION
-# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 4; encoding using the most recent version
+# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version
--- !ELF
FileHeader:
@@ -209,4 +232,4 @@ Sections:
Type: SHT_LLVM_BB_ADDR_MAP
Entries:
## Specify unsupported version
- - Version: 4
+ - Version: 5
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab93316..9c9b2dd 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -8155,6 +8155,8 @@ void LLVMELFDumper<ELFT>::printBBAddrMaps(bool PrettyPGOAnalysis) {
W.printHex("Offset", BBE.Offset);
if (!BBE.CallsiteEndOffsets.empty())
W.printList("Callsite End Offsets", BBE.CallsiteEndOffsets);
+ if (PAM.FeatEnable.BBHash)
+ W.printHex("Hash", BBE.Hash);
W.printHex("Size", BBE.Size);
W.printBoolean("HasReturn", BBE.hasReturn());
W.printBoolean("HasTailCall", BBE.hasTailCall());
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index ef4552f..68e18f6 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -900,7 +900,7 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
while (Cur && Cur.tell() < Content.size()) {
if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) {
Version = Data.getU8(Cur);
- if (Cur && Version > 3)
+ if (Cur && Version > 4)
return createStringError(
errc::invalid_argument,
"invalid SHT_LLVM_BB_ADDR_MAP section version: " +
@@ -946,8 +946,11 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
}
uint64_t Size = Data.getULEB128(Cur);
uint64_t Metadata = Data.getULEB128(Cur);
+ std::optional<llvm::yaml::Hex64> Hash;
+ if (FeatureOrErr->BBHash)
+ Hash = Data.getU64(Cur);
BBEntries.push_back(
- {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets)});
+ {ID, Offset, Size, Metadata, std::move(CallsiteEndOffsets), Hash});
}
TotalNumBlocks += BBEntries.size();
BBRanges.push_back({BaseAddress, /*NumBlocks=*/{}, BBEntries});
diff --git a/llvm/unittests/ADT/TypeTraitsTest.cpp b/llvm/unittests/ADT/TypeTraitsTest.cpp
index a56aa7e..f9b8d6d 100644
--- a/llvm/unittests/ADT/TypeTraitsTest.cpp
+++ b/llvm/unittests/ADT/TypeTraitsTest.cpp
@@ -40,9 +40,7 @@ struct Foo {
struct CheckMethodPointer : CheckFunctionTraits<decltype(&Foo::func)> {};
/// Test lambda references.
-LLVM_ATTRIBUTE_UNUSED auto lambdaFunc = [](const int &v) -> bool {
- return true;
-};
+[[maybe_unused]] auto lambdaFunc = [](const int &v) -> bool { return true; };
struct CheckLambda : CheckFunctionTraits<decltype(lambdaFunc)> {};
} // end anonymous namespace
diff --git a/llvm/unittests/IR/ConstantsTest.cpp b/llvm/unittests/IR/ConstantsTest.cpp
index 54c7ddd..6376165 100644
--- a/llvm/unittests/IR/ConstantsTest.cpp
+++ b/llvm/unittests/IR/ConstantsTest.cpp
@@ -564,13 +564,17 @@ TEST(ConstantsTest, FoldGlobalVariablePtr) {
Global->setAlignment(Align(4));
- ConstantInt *TheConstant(ConstantInt::get(IntType, 2));
+ ConstantInt *TheConstant = ConstantInt::get(IntType, 2);
- Constant *TheConstantExpr(ConstantExpr::getPtrToInt(Global.get(), IntType));
+ Constant *PtrToInt = ConstantExpr::getPtrToInt(Global.get(), IntType);
+ ASSERT_TRUE(
+ ConstantFoldBinaryInstruction(Instruction::And, PtrToInt, TheConstant)
+ ->isNullValue());
- ASSERT_TRUE(ConstantFoldBinaryInstruction(Instruction::And, TheConstantExpr,
- TheConstant)
- ->isNullValue());
+ Constant *PtrToAddr = ConstantExpr::getPtrToAddr(Global.get(), IntType);
+ ASSERT_TRUE(
+ ConstantFoldBinaryInstruction(Instruction::And, PtrToAddr, TheConstant)
+ ->isNullValue());
}
// Check that containsUndefOrPoisonElement and containsPoisonElement is working
diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp
index 17d9f50..d6a3ca5 100644
--- a/llvm/unittests/Object/ELFObjectFileTest.cpp
+++ b/llvm/unittests/Object/ELFObjectFileTest.cpp
@@ -531,7 +531,7 @@ Sections:
// Check that we can detect unsupported versions.
SmallString<128> UnsupportedVersionYamlString(CommonYamlString);
UnsupportedVersionYamlString += R"(
- - Version: 4
+ - Version: 5
BBRanges:
- BaseAddress: 0x11111
BBEntries:
@@ -543,7 +543,7 @@ Sections:
{
SCOPED_TRACE("unsupported version");
DoCheck(UnsupportedVersionYamlString,
- "unsupported SHT_LLVM_BB_ADDR_MAP version: 4");
+ "unsupported SHT_LLVM_BB_ADDR_MAP version: 5");
}
SmallString<128> ZeroBBRangesYamlString(CommonYamlString);
@@ -761,14 +761,14 @@ Sections:
BBAddrMap E1 = {
{{0x11111,
- {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}}}}}};
+ {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0}}}}};
BBAddrMap E2 = {
- {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}},
- {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}}}}}};
+ {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}},
+ {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0}}}}};
BBAddrMap E3 = {
- {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}}}}}};
+ {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}}}}};
BBAddrMap E4 = {
- {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}}}}}};
+ {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}}}}};
std::vector<BBAddrMap> Section0BBAddrMaps = {E4};
std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
@@ -988,6 +988,123 @@ Sections:
}
}
+// Test for the ELFObjectFile::readBBAddrMap API with BBHash.
+TEST(ELFObjectFileTest, ReadBBHash) {
+ StringRef CommonYamlString(R"(
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+Sections:
+ - Name: .llvm_bb_addr_map_1
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: 1
+ Entries:
+ - Version: 4
+ Feature: 0x60
+ BBRanges:
+ - BaseAddress: 0x11111
+ BBEntries:
+ - ID: 1
+ AddressOffset: 0x0
+ Size: 0x1
+ Metadata: 0x2
+ CallsiteEndOffsets: [ 0x1 , 0x1 ]
+ Hash: 0x1
+ - Name: .llvm_bb_addr_map_2
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: 1
+ Entries:
+ - Version: 4
+ Feature: 0x48
+ BBRanges:
+ - BaseAddress: 0x22222
+ BBEntries:
+ - ID: 2
+ AddressOffset: 0x0
+ Size: 0x2
+ Metadata: 0x4
+ Hash: 0x2
+ - BaseAddress: 0xFFFFF
+ BBEntries:
+ - ID: 15
+ AddressOffset: 0xF0
+ Size: 0xF1
+ Metadata: 0x1F
+ Hash: 0xF
+ - Name: .llvm_bb_addr_map_3
+ Type: SHT_LLVM_BB_ADDR_MAP
+ Link: 2
+ Entries:
+ - Version: 4
+ Feature: 0x40
+ BBRanges:
+ - BaseAddress: 0x33333
+ BBEntries:
+ - ID: 0
+ AddressOffset: 0x0
+ Size: 0x3
+ Metadata: 0x6
+ Hash: 0x3
+ - Name: .llvm_bb_addr_map_4
+ Type: SHT_LLVM_BB_ADDR_MAP
+ # Link: 0 (by default, can be overriden)
+ Entries:
+ - Version: 4
+ Feature: 0x40
+ BBRanges:
+ - BaseAddress: 0x44444
+ BBEntries:
+ - ID: 0
+ AddressOffset: 0x0
+ Size: 0x4
+ Metadata: 0x18
+ Hash: 0x4
+)");
+
+ BBAddrMap E1 = {
+ {{0x11111,
+ {{1, 0x0, 0x3, {false, true, false, false, false}, {0x1, 0x2}, 0x1}}}}};
+ BBAddrMap E2 = {
+ {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0x2}}},
+ {0xFFFFF, {{15, 0xF0, 0xF1, {true, true, true, true, true}, {}, 0xF}}}}};
+ BBAddrMap E3 = {
+ {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0x3}}}}};
+ BBAddrMap E4 = {
+ {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0x4}}}}};
+
+ std::vector<BBAddrMap> Section0BBAddrMaps = {E4};
+ std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
+ std::vector<BBAddrMap> Section2BBAddrMaps = {E1, E2};
+ std::vector<BBAddrMap> AllBBAddrMaps = {E1, E2, E3, E4};
+
+ auto DoCheckSucceeds = [&](StringRef YamlString,
+ std::optional<unsigned> TextSectionIndex,
+ std::vector<BBAddrMap> ExpectedResult) {
+ SCOPED_TRACE("for TextSectionIndex: " +
+ (TextSectionIndex ? llvm::Twine(*TextSectionIndex) : "{}") +
+ " and object yaml:\n" + YamlString);
+ SmallString<0> Storage;
+ Expected<ELFObjectFile<ELF64LE>> ElfOrErr =
+ toBinary<ELF64LE>(Storage, YamlString);
+ ASSERT_THAT_EXPECTED(ElfOrErr, Succeeded());
+
+ Expected<const typename ELF64LE::Shdr *> BBAddrMapSecOrErr =
+ ElfOrErr->getELFFile().getSection(1);
+ ASSERT_THAT_EXPECTED(BBAddrMapSecOrErr, Succeeded());
+ auto BBAddrMaps = ElfOrErr->readBBAddrMap(TextSectionIndex);
+ ASSERT_THAT_EXPECTED(BBAddrMaps, Succeeded());
+ EXPECT_EQ(*BBAddrMaps, ExpectedResult);
+ };
+
+ DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/std::nullopt,
+ AllBBAddrMaps);
+ DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/0, Section0BBAddrMaps);
+ DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/2, Section1BBAddrMaps);
+ DoCheckSucceeds(CommonYamlString, /*TextSectionIndex=*/1, Section2BBAddrMaps);
+}
+
// Test for the ELFObjectFile::readBBAddrMap API with PGOAnalysisMap.
TEST(ELFObjectFileTest, ReadPGOAnalysisMap) {
StringRef CommonYamlString(R"(
@@ -1159,29 +1276,32 @@ Sections:
)");
BBAddrMap E1 = {
- {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}}}}}};
- PGOAnalysisMap P1 = {892, {}, {true, false, false, false, false, false}};
+ {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}, 0}}}}};
+ PGOAnalysisMap P1 = {
+ 892, {}, {true, false, false, false, false, false, false}};
BBAddrMap E2 = {
- {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}};
+ {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}};
PGOAnalysisMap P2 = {{},
{{BlockFrequency(343), {}}},
- {false, true, false, false, false, false}};
- BBAddrMap E3 = {{{0x33333,
- {{0, 0x0, 0x3, {false, true, true, false, false}, {}},
- {1, 0x3, 0x3, {false, false, true, false, false}, {}},
- {2, 0x6, 0x3, {false, false, false, false, false}, {}}}}}};
+ {false, true, false, false, false, false, false}};
+ BBAddrMap E3 = {
+ {{0x33333,
+ {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0},
+ {1, 0x3, 0x3, {false, false, true, false, false}, {}, 0},
+ {2, 0x6, 0x3, {false, false, false, false, false}, {}, 0}}}}};
PGOAnalysisMap P3 = {{},
{{{},
{{1, BranchProbability::getRaw(0x1111'1111)},
{2, BranchProbability::getRaw(0xeeee'eeee)}}},
{{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}},
{{}, {}}},
- {false, false, true, false, false, false}};
- BBAddrMap E4 = {{{0x44444,
- {{0, 0x0, 0x4, {false, false, false, true, true}, {}},
- {1, 0x4, 0x4, {false, false, false, false, false}, {}},
- {2, 0x8, 0x4, {false, false, false, false, false}, {}},
- {3, 0xc, 0x4, {false, false, false, false, false}, {}}}}}};
+ {false, false, true, false, false, false, false}};
+ BBAddrMap E4 = {
+ {{0x44444,
+ {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0},
+ {1, 0x4, 0x4, {false, false, false, false, false}, {}, 0},
+ {2, 0x8, 0x4, {false, false, false, false, false}, {}, 0},
+ {3, 0xc, 0x4, {false, false, false, false, false}, {}, 0}}}}};
PGOAnalysisMap P4 = {
1000,
{{BlockFrequency(1000),
@@ -1193,22 +1313,24 @@ Sections:
{3, BranchProbability::getRaw(0xeeee'eeee)}}},
{BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}},
{BlockFrequency(1000), {}}},
- {true, true, true, false, false, false}};
+ {true, true, true, false, false, false, false}};
BBAddrMap E5 = {
- {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}}}}}};
- PGOAnalysisMap P5 = {{}, {}, {false, false, false, false, false, false}};
+ {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}};
+ PGOAnalysisMap P5 = {
+ {}, {}, {false, false, false, false, false, false, false}};
BBAddrMap E6 = {
{{0x66666,
- {{0, 0x0, 0x6, {false, true, true, false, false}, {}},
- {1, 0x6, 0x6, {false, false, true, false, false}, {}}}},
- {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}, {}}}}}};
+ {{0, 0x0, 0x6, {false, true, true, false, false}, {}, 0},
+ {1, 0x6, 0x6, {false, false, true, false, false}, {}, 0}}},
+ {0x666661,
+ {{2, 0x0, 0x6, {false, false, false, false, false}, {}, 0}}}}};
PGOAnalysisMap P6 = {{},
{{{},
{{1, BranchProbability::getRaw(0x2222'2222)},
{2, BranchProbability::getRaw(0xcccc'cccc)}}},
{{}, {{2, BranchProbability::getRaw(0x8888'8888)}}},
{{}, {}}},
- {false, false, true, true, false, false}};
+ {false, false, true, true, false, false, false}};
std::vector<BBAddrMap> Section0BBAddrMaps = {E4, E5, E6};
std::vector<BBAddrMap> Section1BBAddrMaps = {E3};
diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp
index f88931b5f..1765e15 100644
--- a/llvm/unittests/Object/ELFTypesTest.cpp
+++ b/llvm/unittests/Object/ELFTypesTest.cpp
@@ -101,21 +101,22 @@ static_assert(
"PGOAnalysisMap should use the same type for basic block ID as BBAddrMap");
TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) {
- const std::array<BBAddrMap::Features, 11> Decoded = {
- {{false, false, false, false, false, false},
- {true, false, false, false, false, false},
- {false, true, false, false, false, false},
- {false, false, true, false, false, false},
- {false, false, false, true, false, false},
- {true, true, false, false, false, false},
- {false, true, true, false, false, false},
- {false, true, true, true, false, false},
- {true, true, true, true, false, false},
- {false, false, false, false, true, false},
- {false, false, false, false, false, true}}};
- const std::array<uint8_t, 11> Encoded = {{0b0000, 0b0001, 0b0010, 0b0100,
- 0b1000, 0b0011, 0b0110, 0b1110,
- 0b1111, 0b1'0000, 0b10'0000}};
+ const std::array<BBAddrMap::Features, 12> Decoded = {
+ {{false, false, false, false, false, false, false},
+ {true, false, false, false, false, false, false},
+ {false, true, false, false, false, false, false},
+ {false, false, true, false, false, false, false},
+ {false, false, false, true, false, false, false},
+ {true, true, false, false, false, false, false},
+ {false, true, true, false, false, false, false},
+ {false, true, true, true, false, false, false},
+ {true, true, true, true, false, false, false},
+ {false, false, false, false, true, false, false},
+ {false, false, false, false, false, true, false},
+ {false, false, false, false, false, false, true}}};
+ const std::array<uint8_t, 12> Encoded = {
+ {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111,
+ 0b1'0000, 0b10'0000, 0b100'0000}};
for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded))
EXPECT_EQ(Feat.encode(), EncodedVal);
for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) {
@@ -128,9 +129,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) {
TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) {
const std::array<std::string, 2> Errors = {
- "invalid encoding for BBAddrMap::Features: 0x40",
+ "invalid encoding for BBAddrMap::Features: 0x80",
"invalid encoding for BBAddrMap::Features: 0xf0"};
- const std::array<uint8_t, 2> Values = {{0b100'0000, 0b1111'0000}};
+ const std::array<uint8_t, 2> Values = {{0b1000'0000, 0b1111'0000}};
for (const auto &[Val, Error] : llvm::zip(Values, Errors)) {
EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(),
FailedWithMessage(Error));
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 5ac4c53..809960d 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -228,6 +228,114 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_SUB) {
EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
}
+TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADD) {
+ SDLoc Loc;
+ auto IntVT = EVT::getIntegerVT(Context, 8);
+ auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT);
+ auto N0 = DAG->getConstant(0x00, Loc, IntVT);
+ auto N1 = DAG->getConstant(0x01, Loc, IntVT);
+ auto N5 = DAG->getConstant(0x05, Loc, IntVT);
+ auto N8 = DAG->getConstant(0x08, Loc, IntVT);
+ auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT);
+ auto UnknownOp = DAG->getRegister(0, IntVT);
+ auto Mask = DAG->getConstant(0x1e, Loc, IntVT);
+ auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp);
+ // RHS early out
+ // Nsign1 = 01010101
+ // Nsign3 = 000????0
+ auto OpRhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nsign1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u);
+
+ // ADD 0 -1
+ // N0 = 00000000
+ // Nneg1 = 11111111
+ auto OpNegZero = DAG->getNode(ISD::ADD, Loc, IntVT, N0, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u);
+
+ // ADD 1 -1
+ // N1 = 00000001
+ // Nneg1 = 11111111
+ auto OpNegOne = DAG->getNode(ISD::ADD, Loc, IntVT, N1, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u);
+
+ // ADD 8 -1
+ // N8 = 00001000
+ // Nneg1 = 11111111
+ auto OpSeven = DAG->getNode(ISD::ADD, Loc, IntVT, N8, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 5u);
+
+ // Non negative
+ // Nsign3 = 000????0
+ // Nneg1 = 11111111
+ auto OpNonNeg = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u);
+
+ // LHS early out
+ // Nsign1 = 01010101
+ // Nsign3 = 000????0
+ auto OpLhsEo = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign1, Nsign3);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u);
+
+ // Nsign3 = 000????0
+ // N5 = 00000101
+ auto Op = DAG->getNode(ISD::ADD, Loc, IntVT, Nsign3, N5);
+ EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
+}
+
+TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_ADDC) {
+ SDLoc Loc;
+ auto IntVT = EVT::getIntegerVT(Context, 8);
+ auto Nneg1 = DAG->getConstant(0xFF, Loc, IntVT);
+ auto N0 = DAG->getConstant(0x00, Loc, IntVT);
+ auto N1 = DAG->getConstant(0x01, Loc, IntVT);
+ auto N5 = DAG->getConstant(0x05, Loc, IntVT);
+ auto N8 = DAG->getConstant(0x08, Loc, IntVT);
+ auto Nsign1 = DAG->getConstant(0x55, Loc, IntVT);
+ auto UnknownOp = DAG->getRegister(0, IntVT);
+ auto Mask = DAG->getConstant(0x1e, Loc, IntVT);
+ auto Nsign3 = DAG->getNode(ISD::AND, Loc, IntVT, Mask, UnknownOp);
+ // RHS early out
+ // Nsign1 = 01010101
+ // Nsign3 = 000????0
+ auto OpRhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nsign1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpRhsEo), 1u);
+
+ // ADD 0 -1
+ // N0 = 00000000
+ // Nneg1 = 11111111
+ auto OpNegZero = DAG->getNode(ISD::ADDC, Loc, IntVT, N0, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNegZero), 8u);
+
+ // ADD 1 -1
+ // N1 = 00000001
+ // Nneg1 = 11111111
+ auto OpNegOne = DAG->getNode(ISD::ADDC, Loc, IntVT, N1, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNegOne), 8u);
+
+ // ADD 8 -1
+ // N8 = 00001000
+ // Nneg1 = 11111111
+ auto OpSeven = DAG->getNode(ISD::ADDC, Loc, IntVT, N8, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpSeven), 4u);
+
+ // Non negative
+ // Nsign3 = 000????0
+ // Nneg1 = 11111111
+ auto OpNonNeg = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, Nneg1);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpNonNeg), 3u);
+
+ // LHS early out
+ // Nsign1 = 01010101
+ // Nsign3 = 000????0
+ auto OpLhsEo = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign1, Nsign3);
+ EXPECT_EQ(DAG->ComputeNumSignBits(OpLhsEo), 1u);
+
+ // Nsign3 = 000????0
+ // N5 = 00000101
+ auto Op = DAG->getNode(ISD::ADDC, Loc, IntVT, Nsign3, N5);
+ EXPECT_EQ(DAG->ComputeNumSignBits(Op), 2u);
+}
+
TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
TargetLowering TL(*TM);
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 343c2bb71..a5c5426 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -906,7 +906,6 @@ Transforms/InstCombine/select_frexp.ll
Transforms/InstCombine/select.ll
Transforms/InstCombine/select-min-max.ll
Transforms/InstCombine/select-of-symmetric-selects.ll
-Transforms/InstCombine/select-safe-impliedcond-transforms.ll
Transforms/InstCombine/select-safe-transforms.ll
Transforms/InstCombine/select-select.ll
Transforms/InstCombine/select-with-extreme-eq-cond.ll
@@ -1237,7 +1236,6 @@ Transforms/PartiallyInlineLibCalls/X86/good-prototype.ll
Transforms/PGOProfile/chr-dead-pred.ll
Transforms/PGOProfile/chr-dup-threshold.ll
Transforms/PGOProfile/chr-lifetimes.ll
-Transforms/PGOProfile/chr.ll
Transforms/PGOProfile/chr-poison.ll
Transforms/PGOProfile/comdat.ll
Transforms/PGOProfile/memop_profile_funclet_wasm.ll
@@ -1312,14 +1310,11 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SROA/addrspacecast.ll
-Transforms/SROA/phi-and-select.ll
Transforms/SROA/phi-gep.ll
Transforms/SROA/scalable-vectors-with-known-vscale.ll
Transforms/SROA/select-gep.ll
Transforms/SROA/select-load.ll
Transforms/SROA/slice-width.ll
-Transforms/SROA/std-clamp.ll
Transforms/SROA/vector-conversion.ll
Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
Transforms/SROA/vector-promotion.ll