aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MachineSMEABIPass.cpp108
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp15
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp12
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp140
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrP10.td189
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td20
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp1
-rw-r--r--llvm/lib/Target/Sparc/SparcFrameLowering.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp2
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp11
-rw-r--r--llvm/lib/Target/X86/X86.td5
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp7
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td13
31 files changed, 511 insertions, 95 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9926a4d..be2f2e4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16254,7 +16254,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
SplatVal > 1) {
SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
SDValue Res =
- DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
+ DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32));
if (Negated)
Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
@@ -22942,7 +22942,7 @@ static SDValue performIntrinsicCombine(SDNode *N,
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_asrd:
- return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
@@ -30047,7 +30047,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
SDValue Res =
- DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
+ DAG.getNode(AArch64ISD::ASRD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
if (Negated)
Res = DAG.getNode(ISD::SUB, DL, ContainerVT,
DAG.getConstant(0, DL, ContainerVT), Res);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7322212..fe84193 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -233,6 +233,12 @@ def G_SDOT : AArch64GenericInstruction {
let hasSideEffects = 0;
}
+def G_USDOT : AArch64GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
+ let hasSideEffects = 0;
+}
+
// Generic instruction for the BSP pseudo. It is expanded into BSP, which
// expands into BSL/BIT/BIF after register allocation.
def G_BSP : AArch64GenericInstruction {
@@ -278,6 +284,7 @@ def : GINodeEquiv<G_UADDLV, AArch64uaddlv>;
def : GINodeEquiv<G_UDOT, AArch64udot>;
def : GINodeEquiv<G_SDOT, AArch64sdot>;
+def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
index b3c9656..343fd81 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
@@ -40,7 +40,11 @@ yaml::AArch64FunctionInfo::AArch64FunctionInfo(
getSVEStackSize(MFI, &llvm::AArch64FunctionInfo::getStackSizePPR)),
HasStackFrame(MFI.hasStackFrame()
? std::optional<bool>(MFI.hasStackFrame())
- : std::nullopt) {}
+ : std::nullopt),
+ HasStreamingModeChanges(
+ MFI.hasStreamingModeChanges()
+ ? std::optional<bool>(MFI.hasStreamingModeChanges())
+ : std::nullopt) {}
void yaml::AArch64FunctionInfo::mappingImpl(yaml::IO &YamlIO) {
MappingTraits<AArch64FunctionInfo>::mapping(YamlIO, *this);
@@ -55,6 +59,8 @@ void AArch64FunctionInfo::initializeBaseYamlFields(
YamlMFI.StackSizePPR.value_or(0));
if (YamlMFI.HasStackFrame)
setHasStackFrame(*YamlMFI.HasStackFrame);
+ if (YamlMFI.HasStreamingModeChanges)
+ setHasStreamingModeChanges(*YamlMFI.HasStreamingModeChanges);
}
static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index bd0a17d..d1832f4 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -645,6 +645,7 @@ struct AArch64FunctionInfo final : public yaml::MachineFunctionInfo {
std::optional<uint64_t> StackSizeZPR;
std::optional<uint64_t> StackSizePPR;
std::optional<bool> HasStackFrame;
+ std::optional<bool> HasStreamingModeChanges;
AArch64FunctionInfo() = default;
AArch64FunctionInfo(const llvm::AArch64FunctionInfo &MFI);
@@ -659,6 +660,7 @@ template <> struct MappingTraits<AArch64FunctionInfo> {
YamlIO.mapOptional("stackSizeZPR", MFI.StackSizeZPR);
YamlIO.mapOptional("stackSizePPR", MFI.StackSizePPR);
YamlIO.mapOptional("hasStackFrame", MFI.HasStackFrame);
+ YamlIO.mapOptional("hasStreamingModeChanges", MFI.HasStreamingModeChanges);
}
};
diff --git a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
index cdf2822..a90950d 100644
--- a/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PostCoalescerPass.cpp
@@ -75,6 +75,10 @@ bool AArch64PostCoalescer::runOnMachineFunction(MachineFunction &MF) {
if (Src != Dst)
MRI->replaceRegWith(Dst, Src);
+ if (MI.getOperand(1).isUndef())
+ for (MachineOperand &MO : MRI->use_operands(Dst))
+ MO.setIsUndef();
+
// MI must be erased from the basic block before recalculating the live
// interval.
LIS->RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index bc6b931..98a128e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -265,7 +265,7 @@ def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
]>;
-def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
+def AArch64asrd_m1 : SDNode<"AArch64ISD::ASRD_MERGE_OP1", SDT_AArch64Arith_Imm>;
def AArch64urshri_p_node : SDNode<"AArch64ISD::URSHR_I_PRED", SDT_AArch64Arith_Imm>;
def AArch64urshri_p : PatFrags<(ops node:$op1, node:$op2, node:$op3),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 9e2d698..05a4313 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1855,6 +1855,8 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return LowerTriOp(AArch64::G_UDOT);
case Intrinsic::aarch64_neon_sdot:
return LowerTriOp(AArch64::G_SDOT);
+ case Intrinsic::aarch64_neon_usdot:
+ return LowerTriOp(AArch64::G_USDOT);
case Intrinsic::aarch64_neon_sqxtn:
return LowerUnaryOp(TargetOpcode::G_TRUNC_SSAT_S);
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
index 4749748..434ea67 100644
--- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
@@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI,
LiveRegs PhysLiveRegs);
+ /// Attempts to find an insertion point before \p Inst where the status flags
+ /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
+ /// the block is found.
+ std::pair<MachineBasicBlock::iterator, LiveRegs>
+ findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
+ SmallVectorImpl<InstInfo>::const_iterator Inst);
void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, ZAState From,
ZAState To, LiveRegs PhysLiveRegs);
@@ -337,6 +343,28 @@ private:
MachineRegisterInfo *MRI = nullptr;
};
+static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
+ LiveRegs PhysLiveRegs = LiveRegs::None;
+ if (!LiveUnits.available(AArch64::NZCV))
+ PhysLiveRegs |= LiveRegs::NZCV;
+ // We have to track W0 and X0 separately as otherwise things can get
+ // confused if we attempt to preserve X0 but only W0 was defined.
+ if (!LiveUnits.available(AArch64::W0))
+ PhysLiveRegs |= LiveRegs::W0;
+ if (!LiveUnits.available(AArch64::W0_HI))
+ PhysLiveRegs |= LiveRegs::W0_HI;
+ return PhysLiveRegs;
+}
+
+static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
+ if (PhysLiveRegs & LiveRegs::NZCV)
+ LiveUnits.addReg(AArch64::NZCV);
+ if (PhysLiveRegs & LiveRegs::W0)
+ LiveUnits.addReg(AArch64::W0);
+ if (PhysLiveRegs & LiveRegs::W0_HI)
+ LiveUnits.addReg(AArch64::W0_HI);
+}
+
FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
SMEFnAttrs.hasZAState()) &&
@@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
LiveRegUnits LiveUnits(*TRI);
LiveUnits.addLiveOuts(MBB);
- auto GetPhysLiveRegs = [&] {
- LiveRegs PhysLiveRegs = LiveRegs::None;
- if (!LiveUnits.available(AArch64::NZCV))
- PhysLiveRegs |= LiveRegs::NZCV;
- // We have to track W0 and X0 separately as otherwise things can get
- // confused if we attempt to preserve X0 but only W0 was defined.
- if (!LiveUnits.available(AArch64::W0))
- PhysLiveRegs |= LiveRegs::W0;
- if (!LiveUnits.available(AArch64::W0_HI))
- PhysLiveRegs |= LiveRegs::W0_HI;
- return PhysLiveRegs;
- };
-
- Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
+ Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
for (MachineInstr &MI : reverse(MBB)) {
MachineBasicBlock::iterator MBBI(MI);
LiveUnits.stepBackward(MI);
- LiveRegs PhysLiveRegs = GetPhysLiveRegs();
+ LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
// The SMEStateAllocPseudo marker is added to a function if the save
// buffer was allocated in SelectionDAG. It marks the end of the
// allocation -- which is a safe point for this pass to insert any TPIDR2
@@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
return BundleStates;
}
+std::pair<MachineBasicBlock::iterator, LiveRegs>
+MachineSMEABI::findStateChangeInsertionPoint(
+ MachineBasicBlock &MBB, const BlockInfo &Block,
+ SmallVectorImpl<InstInfo>::const_iterator Inst) {
+ LiveRegs PhysLiveRegs;
+ MachineBasicBlock::iterator InsertPt;
+ if (Inst != Block.Insts.end()) {
+ InsertPt = Inst->InsertPt;
+ PhysLiveRegs = Inst->PhysLiveRegs;
+ } else {
+ InsertPt = MBB.getFirstTerminator();
+ PhysLiveRegs = Block.PhysLiveRegsAtExit;
+ }
+
+ if (!(PhysLiveRegs & LiveRegs::NZCV))
+ return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
+
+ // Find the previous state change. We can not move before this point.
+ MachineBasicBlock::iterator PrevStateChangeI;
+ if (Inst == Block.Insts.begin()) {
+ PrevStateChangeI = MBB.begin();
+ } else {
+ // Note: `std::prev(Inst)` is the previous InstInfo. We only create an
+ // InstInfo object for instructions that require a specific ZA state, so the
+ // InstInfo is the site of the previous state change in the block (which can
+ // be several MIs earlier).
+ PrevStateChangeI = std::prev(Inst)->InsertPt;
+ }
+
+ // Note: LiveUnits will only accurately track X0 and NZCV.
+ LiveRegUnits LiveUnits(*TRI);
+ setPhysLiveRegs(LiveUnits, PhysLiveRegs);
+ for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
+ // Don't move before/into a call (which may have a state change before it).
+ if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
+ break;
+ LiveUnits.stepBackward(*I);
+ if (LiveUnits.available(AArch64::NZCV))
+ return {I, getPhysLiveRegs(LiveUnits)};
+ }
+ return {InsertPt, PhysLiveRegs};
+}
+
void MachineSMEABI::insertStateChanges(EmitContext &Context,
const FunctionInfo &FnInfo,
const EdgeBundles &Bundles,
@@ -490,10 +548,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
CurrentState = InState;
for (auto &Inst : Block.Insts) {
- if (CurrentState != Inst.NeededState)
- emitStateChange(Context, MBB, Inst.InsertPt, CurrentState,
- Inst.NeededState, Inst.PhysLiveRegs);
- CurrentState = Inst.NeededState;
+ if (CurrentState != Inst.NeededState) {
+ auto [InsertPt, PhysLiveRegs] =
+ findStateChangeInsertionPoint(MBB, Block, &Inst);
+ emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
+ PhysLiveRegs);
+ CurrentState = Inst.NeededState;
+ }
}
if (MBB.succ_empty())
@@ -501,9 +562,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
ZAState OutState =
BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
- if (CurrentState != OutState)
- emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState,
- OutState, Block.PhysLiveRegsAtExit);
+ if (CurrentState != OutState) {
+ auto [InsertPt, PhysLiveRegs] =
+ findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
+ emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
+ PhysLiveRegs);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index c684f9e..01a40c1 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -654,7 +654,6 @@ void SIPreEmitPeephole::collectUnpackingCandidates(
if (TotalCyclesBetweenCandidates < NumMFMACycles - 1)
InstrsToUnpack.insert(&Instr);
}
- return;
}
void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
@@ -681,7 +680,6 @@ void SIPreEmitPeephole::performF32Unpacking(MachineInstr &I) {
HiDstOp.setIsRenamable(DstOp.isRenamable());
I.eraseFromParent();
- return;
}
MachineInstrBuilder SIPreEmitPeephole::createUnpackedMI(MachineInstr &I,
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9945ecc..0d7b6d1 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -161,8 +161,8 @@ namespace {
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
return TE.PseudoOpc < PseudoOpc;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
- const NEONLdStTableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned PseudoOpc,
+ const NEONLdStTableEntry &TE) {
return PseudoOpc < TE.PseudoOpc;
}
};
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 82c43ff..26a8728 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -1165,12 +1165,15 @@ void DXILBitcodeWriter::writeValueSymbolTableForwardDecl() {}
/// Returns the bit offset to backpatch with the location of the real VST.
void DXILBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
- if (!M.getTargetTriple().empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE,
- M.getTargetTriple().str(), 0 /*TODO*/);
- const std::string &DL = M.getDataLayoutStr();
- if (!DL.empty())
- writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
+ // We need to hardcode a triple and datalayout that's compatible with the
+ // historical DXIL triple and datalayout from DXC.
+ StringRef Triple = "dxil-ms-dx";
+ StringRef DL = "e-m:e-p:32:32-i1:8-i8:8-i16:32-i32:32-i64:64-"
+ "f16:32-f32:32-f64:64-n8:16:32:64";
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, Triple, 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+
if (!M.getModuleInlineAsm().empty())
writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
index 1eb03bf..725f2b1 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILWriterPass.cpp
@@ -149,11 +149,6 @@ public:
std::string Data;
llvm::raw_string_ostream OS(Data);
- Triple OriginalTriple = M.getTargetTriple();
- // Set to DXIL triple when write to bitcode.
- // Only the output bitcode need to be DXIL triple.
- M.setTargetTriple(Triple("dxil-ms-dx"));
-
// Perform late legalization of lifetime intrinsics that would otherwise
// fail the Module Verifier if performed in an earlier pass
legalizeLifetimeIntrinsics(M);
@@ -165,9 +160,6 @@ public:
// not-so-legal legalizations
removeLifetimeIntrinsics(M);
- // Recover triple.
- M.setTargetTriple(OriginalTriple);
-
Constant *ModuleConstant =
ConstantDataArray::get(M.getContext(), arrayRefFromStringRef(Data));
auto *GV = new llvm::GlobalVariable(M, ModuleConstant->getType(), true,
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 52e6b0b..68f5312 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -174,8 +174,8 @@ namespace {
const TargetRegisterInfo *TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
- LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegSet &P);
raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
OS << '{';
for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 14b6bb3..9087f9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -272,15 +272,14 @@ namespace {
OS << *I << ' ' << **I << '\n';
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const NodeVect &S);
raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
dump_node_container(OS, S);
return OS;
}
- raw_ostream &operator<< (raw_ostream &OS,
- const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const NodeToUsesMap &M);
raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
for (const auto &I : M) {
const UseSet &Us = I.second;
@@ -914,9 +913,8 @@ namespace {
const NodeToValueMap &Map;
};
- raw_ostream &operator<< (raw_ostream &OS,
- const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
- raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const LocationAsBlock &Loc) {
for (const auto &I : Loc.Map) {
OS << I.first << " -> ";
if (BasicBlock *B = cast_or_null<BasicBlock>(I.second))
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 14a7ae7..3900aac 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -132,8 +132,7 @@ namespace {
const TargetRegisterInfo &TRI;
friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
};
- raw_ostream &operator<<(raw_ostream &OS,
- const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ [[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P);
raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
<< ", PredR:" << printReg(P.FP.PredR, &P.TRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index f9fdab4..9c81e96 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -51,11 +51,11 @@ private:
const TargetRegisterInfo &TRI;
};
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
- LLVM_ATTRIBUTE_UNUSED;
- raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
- return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
- }
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const PrintRegister &PR);
+raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+ return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
+}
class HexagonGenPredicate : public MachineFunctionPass {
public:
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index bfea50e..6b48a21 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -422,12 +422,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) {
return new HexagonTargetStreamer(S);
}
-static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static void clearFeature(MCSubtargetInfo *STI, uint64_t F) {
if (STI->hasFeature(F))
STI->ToggleFeature(F);
}
-static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+[[maybe_unused]] static bool checkFeature(MCSubtargetInfo *STI, uint64_t F) {
return STI->hasFeature(F);
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 944a1e2..8bf0d11 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
return SDV;
}
+ // Recognize build vector patterns to emit VSX vector instructions
+ // instead of loading value from memory.
+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
+ return VecPat;
}
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
@@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N,
return SDValue();
}
+// LXVKQ instruction load VSX vector with a special quadword value
+// based on an immediate value. This helper method returns the details of the
+// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
+// to help generate the LXVKQ instruction and the subsequent shift instruction
+// required to match the original build vector pattern.
+
+// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
+using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
+
+static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
+
+ // LXVKQ instruction loads the Quadword value:
+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
+ static const uint32_t Uim = 16;
+
+ // Check for direct LXVKQ match (no shift needed)
+ if (FullVal == BasePattern)
+ return std::make_tuple(Uim, uint8_t{0});
+
+ // Check if FullValue is 1 (the result of the base pattern >> 127)
+ if (FullVal == APInt(128, 1))
+ return std::make_tuple(Uim, uint8_t{127});
+
+ return std::nullopt;
+}
+
+/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
+/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
+/// LXVKQ instruction load VSX vector with a special quadword value based on an
+/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
+/// This can be used to inline the build vector constants that have the
+/// following patterns:
+///
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
+/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
+/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
+/// combination of splatting and right shift instructions.
+
+SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
+ "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
+
+ // This transformation is only supported if we are loading either a byte,
+ // halfword, word, or doubleword.
+ EVT VT = Op.getValueType();
+ if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64))
+ return SDValue();
+
+ LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
+ << VT.getEVTString() << "): ";
+ Op->dump());
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ElemBits = VT.getScalarSizeInBits();
+
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
+
+ // Check for Non-constant operand in the build vector.
+ for (const SDValue &Operand : Op.getNode()->op_values()) {
+ if (!isa<ConstantSDNode>(Operand))
+ return SDValue();
+ }
+
+ // Assemble build vector operands as a 128-bit register value
+ // We need to reconstruct what the 128-bit register pattern would be
+ // that produces this vector when interpreted with the current endianness
+ APInt FullVal = APInt::getZero(128);
+
+ for (unsigned Index = 0; Index < NumElems; ++Index) {
+ auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
+
+ // Get element value as raw bits (zero-extended)
+ uint64_t ElemValue = C->getZExtValue();
+
+ // Mask to element size to ensure we only get the relevant bits
+ if (ElemBits < 64)
+ ElemValue &= ((1ULL << ElemBits) - 1);
+
+ // Calculate bit position for this element in the 128-bit register
+ unsigned BitPos =
+ (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
+
+ // Create APInt for the element value and shift it to correct position
+ APInt ElemAPInt(128, ElemValue);
+ ElemAPInt <<= BitPos;
+
+ // Place the element value at the correct bit position
+ FullVal |= ElemAPInt;
+ }
+
+ if (FullVal.isZero() || FullVal.isAllOnes())
+ return SDValue();
+
+ if (auto UIMOpt = getPatternInfo(FullVal)) {
+ const auto &[Uim, ShiftAmount] = *UIMOpt;
+ SDLoc Dl(Op);
+
+ // Generate LXVKQ instruction if the shift amount is zero.
+ if (ShiftAmount == 0) {
+ SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
+ SDValue LxvkqInstr =
+ SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "combineBVLoadsSpecialValue: Instruction Emitted ";
+ LxvkqInstr.dump());
+ return LxvkqInstr;
+ }
+
+ assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
+
+ // The right shifted pattern can be constructed using a combination of
+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
+ // value 255.
+ SDValue ShiftAmountVec =
+ SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
+ DAG.getTargetConstant(255, Dl, MVT::i32)),
+ 0);
+ // Generate appropriate right shift instruction
+ SDValue ShiftVec = SDValue(
+ DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
+ 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
+ ShiftVec.dump());
+ return ShiftVec;
+ }
+ // No patterns matched for build vectors.
+ return SDValue();
+}
+
/// Reduce the number of loads when building a vector.
///
/// Building a vector out of multiple loads can be converted to a load
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 59f3387..880aca7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1472,6 +1472,9 @@ namespace llvm {
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
+ SDValue combineBVLoadsSpecialValue(SDValue Operand,
+ SelectionDAG &DAG) const;
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 2384959..2d8c633 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> {
126>;
}
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C),
+// NOT(B), NAND(B,C)}
+// - C is the "false" case op NOR(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNor<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 129>;
+
+ // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>;
+
+ // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)),
+ 133>;
+
+ // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 134>;
+
+ // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 138>;
+
+ // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)),
+ 140>;
+
+ // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 142>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B),
+// NOT(C)}
+// - C is the "false" case op EQV(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectEqv<ValueType Vt>{
+ // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 151>;
+
+ // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 152>;
+
+ // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 154>;
+
+ // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 158>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// B, NOT(B)}
+// - C is the "false" case op NOT(C)
+// =============================================================================
+multiclass XXEvalTernarySelectNotC<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>;
+
+ // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>;
+
+ // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>;
+
+ // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// C, NOT(B)}
+// - C is the "false" case op NOT(B)
+// =============================================================================
+multiclass XXEvalTernarySelectNotB<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>;
+
+ // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>;
+
+ // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>;
+
+ // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)}
+// - C is the "false" case op NAND(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNand<ValueType Vt>{
+ // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>;
+
+ // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>;
+
+ // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 230>;
+
+ // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 231>;
+
+ // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 233>;
+}
+
let Predicates = [PrefixInstrs, HasP10Vector] in {
let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
@@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
defm : XXEvalTernarySelectC<Ty>;
defm : XXEvalTernarySelectXor<Ty>;
defm : XXEvalTernarySelectOr<Ty>;
+ defm : XXEvalTernarySelectNor<Ty>;
+ defm : XXEvalTernarySelectEqv<Ty>;
+ defm : XXEvalTernarySelectNotC<Ty>;
+ defm : XXEvalTernarySelectNotB<Ty>;
+ defm : XXEvalTernarySelectNand<Ty>;
}
// Anonymous patterns to select prefixed VSX loads and stores.
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 8851a0f..e857b2d 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -3356,10 +3356,10 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
bool isValidInsnFormat(StringRef Format, const MCSubtargetInfo &STI) {
return StringSwitch<bool>(Format)
- .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true)
- .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj",
+ .Cases({"r", "r4", "i", "b", "sb", "u", "j", "uj", "s"}, true)
+ .Cases({"cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj"},
STI.hasFeature(RISCV::FeatureStdExtZca))
- .Cases("qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es",
+ .Cases({"qc.eai", "qc.ei", "qc.eb", "qc.ej", "qc.es"},
!STI.hasFeature(RISCV::Feature64Bit))
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 50730c6..ab93bba 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -43,7 +43,7 @@ const llvm::StringRef RISCVLMULInstrument::DESC_NAME = "RISCV-LMUL";
bool RISCVLMULInstrument::isDataValid(llvm::StringRef Data) {
// Return true if not one of the valid LMUL strings
return StringSwitch<bool>(Data)
- .Cases("M1", "M2", "M4", "M8", "MF2", "MF4", "MF8", true)
+ .Cases({"M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"}, true)
.Default(false);
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f863392a..637d61fe 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -270,7 +270,7 @@ class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
// and floating point computation.
// The V pipeline is modeled by the VCQ, VA, VL, and VS resources. There can
// be one or two VA (Vector Arithmetic).
-multiclass SiFive7ProcResources<bit extraVALU = false> {
+multiclass SiFive7ProcResources<bit dualVALU = false> {
let BufferSize = 0 in {
def PipeA : ProcResource<1>;
def PipeB : ProcResource<1>;
@@ -279,7 +279,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def FDiv : ProcResource<1>; // FP Division/Sqrt
// Arithmetic sequencer(s)
- if extraVALU then {
+ if dualVALU then {
// VA1 can handle any vector airthmetic instruction.
def VA1 : ProcResource<1>;
// VA2 generally can only handle simple vector arithmetic.
@@ -305,7 +305,7 @@ multiclass SiFive7ProcResources<bit extraVALU = false> {
def PipeAB : ProcResGroup<[!cast<ProcResource>(NAME#"PipeA"),
!cast<ProcResource>(NAME#"PipeB")]>;
- if extraVALU then
+ if dualVALU then
def VA1OrVA2 : ProcResGroup<[!cast<ProcResource>(NAME#"VA1"),
!cast<ProcResource>(NAME#"VA2")]>;
}
@@ -1550,10 +1550,10 @@ multiclass SiFive7ReadAdvance {
/// This multiclass is a "bundle" of (1) processor resources (i.e. pipes) and
/// (2) WriteRes entries. It's parameterized by config values that will
/// eventually be supplied by different SchedMachineModels.
-multiclass SiFive7SchedResources<int vlen, bit extraVALU,
+multiclass SiFive7SchedResources<int vlen, bit dualVALU,
SiFive7FPLatencies fpLatencies,
bit hasFastGather> {
- defm SiFive7 : SiFive7ProcResources<extraVALU>;
+ defm SiFive7 : SiFive7ProcResources<dualVALU>;
// Pull out defs from SiFive7ProcResources so we can refer to them by name.
defvar SiFive7PipeA = !cast<ProcResource>(NAME # SiFive7PipeA);
@@ -1562,10 +1562,10 @@ multiclass SiFive7SchedResources<int vlen, bit extraVALU,
defvar SiFive7IDiv = !cast<ProcResource>(NAME # SiFive7IDiv);
defvar SiFive7FDiv = !cast<ProcResource>(NAME # SiFive7FDiv);
// Pass SiFive7VA for VA1 and VA1OrVA2 if there is only 1 VALU.
- defvar SiFive7VA1 = !if (extraVALU,
+ defvar SiFive7VA1 = !if (dualVALU,
!cast<ProcResource>(NAME # SiFive7VA1),
!cast<ProcResource>(NAME # SiFive7VA));
- defvar SiFive7VA1OrVA2 = !if (extraVALU,
+ defvar SiFive7VA1OrVA2 = !if (dualVALU,
!cast<ProcResGroup>(NAME # SiFive7VA1OrVA2),
!cast<ProcResource>(NAME # SiFive7VA));
defvar SiFive7VA = !cast<ProcResource>(NAME # SiFive7VA);
@@ -1608,7 +1608,7 @@ class SiFive7SchedMachineModel<int vlen> : SchedMachineModel {
HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
HasStdExtZkr];
int VLEN = vlen;
- bit HasExtraVALU = false;
+ bit HasDualVALU = false;
SiFive7FPLatencies FPLatencies;
bit HasFastGather = false;
@@ -1635,7 +1635,7 @@ def SiFive7VLEN512Model : SiFive7SchedMachineModel<512> {
}
def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
- let HasExtraVALU = true;
+ let HasDualVALU = true;
let FPLatencies = SiFive7LowFPLatencies;
let HasFastGather = true;
}
@@ -1643,7 +1643,7 @@ def SiFive7VLEN1024X300Model : SiFive7SchedMachineModel<1024> {
/// Binding models to their scheduling resources.
foreach model = [SiFive7VLEN512Model, SiFive7VLEN1024X300Model] in {
let SchedModel = model in
- defm model.Name : SiFive7SchedResources<model.VLEN, model.HasExtraVALU,
+ defm model.Name : SiFive7SchedResources<model.VLEN, model.HasDualVALU,
model.FPLatencies,
model.HasFastGather>;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index a466ab2..a0cff4d 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3765,7 +3765,6 @@ void SPIRVInstructionSelector::decorateUsesAsNonUniform(
SPIRV::Decoration::NonUniformEXT, {});
}
}
- return;
}
bool SPIRVInstructionSelector::extractSubvector(
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 2934c88..fa08d44 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -246,8 +246,7 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
}
}
-static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
-{
+[[maybe_unused]] static bool verifyLeafProcRegUse(MachineRegisterInfo *MRI) {
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
if (MRI->isPhysRegUsed(reg))
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
index d9c8e22..6e99fc3 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -23,7 +23,7 @@ std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
.Case("i64", wasm::ValType::I64)
.Case("f32", wasm::ValType::F32)
.Case("f64", wasm::ValType::F64)
- .Cases("v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2",
+ .Cases({"v128", "i8x16", "i16x8", "i32x4", "i64x2", "f32x4", "f64x2"},
wasm::ValType::V128)
.Case("funcref", wasm::ValType::FUNCREF)
.Case("externref", wasm::ValType::EXTERNREF)
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a8908d4..ac251fd 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3514,15 +3514,16 @@ bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
// xacquire <insn> ; xacquire must be accompanied by 'lock'
bool IsPrefix =
StringSwitch<bool>(Name)
- .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
- .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
- .Cases("xacquire", "xrelease", true)
- .Cases("acquire", "release", isParsingIntelSyntax())
+ .Cases({"cs", "ds", "es", "fs", "gs", "ss"}, true)
+ .Cases({"rex64", "data32", "data16", "addr32", "addr16"}, true)
+ .Cases({"xacquire", "xrelease"}, true)
+ .Cases({"acquire", "release"}, isParsingIntelSyntax())
.Default(false);
auto isLockRepeatNtPrefix = [](StringRef N) {
return StringSwitch<bool>(N)
- .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
+ .Cases({"lock", "rep", "repe", "repz", "repne", "repnz", "notrack"},
+ true)
.Default(false);
};
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 2bf016a..6db780f 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1338,7 +1338,6 @@ def ProcessorFeatures {
list<SubtargetFeature> PTLFeatures =
!listremove(ARLSFeatures, [FeatureWIDEKL]);
-
// Clearwaterforest
list<SubtargetFeature> CWFAdditionalFeatures = [FeaturePREFETCHI,
FeatureAVXVNNIINT16,
@@ -1880,8 +1879,10 @@ def : ProcModel<P, AlderlakePModel,
}
def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
ProcessorFeatures.ADLTuning>;
-def : ProcModel<"pantherlake", AlderlakePModel,
+foreach P = ["pantherlake", "wildcatlake"] in {
+def : ProcModel<P, AlderlakePModel,
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
+}
def : ProcModel<"clearwaterforest", AlderlakePModel,
ProcessorFeatures.CWFFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"emeraldrapids", SapphireRapidsModel,
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0991aa..9f88fda 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -602,8 +602,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned V,
- const TableEntry &TE) {
+ [[maybe_unused]] friend bool operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c32b1a6..a0b64ff 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58342,11 +58342,12 @@ static SDValue combineX86CloadCstore(SDNode *N, SelectionDAG &DAG) {
} else if (Op1.getOpcode() == ISD::AND && Sub.getValue(0).use_empty()) {
SDValue Src = Op1;
SDValue Op10 = Op1.getOperand(0);
- if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1))) {
- // res, flags2 = sub 0, (and (xor X, -1), Y)
+ if (Op10.getOpcode() == ISD::XOR && isAllOnesConstant(Op10.getOperand(1)) &&
+ llvm::isOneConstant(Op1.getOperand(1))) {
+ // res, flags2 = sub 0, (and (xor X, -1), 1)
// cload/cstore ..., cond_ne, flag2
// ->
- // res, flags2 = sub 0, (and X, Y)
+ // res, flags2 = sub 0, (and X, 1)
// cload/cstore ..., cond_e, flag2
Src = DAG.getNode(ISD::AND, DL, Op1.getValueType(), Op10.getOperand(0),
Op1.getOperand(1));
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 0fd44b7..ec31675 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1256,8 +1256,17 @@ def : Pat<(i64 (X86Wrapper tconstpool :$dst)),
(MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper tjumptable :$dst)),
(MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
- (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+
+// If the globaladdr is an absolute_symbol, don't bother using the sign extending
+// instruction since there's no benefit to using it with absolute symbols.
+def globalAddrNoAbsSym : PatLeaf<(tglobaladdr:$dst), [{
+ auto *GA = cast<GlobalAddressSDNode>(N);
+ return !GA->getGlobal()->getAbsoluteSymbolRange();
+}]>;
+def : Pat<(i64 (X86Wrapper globalAddrNoAbsSym:$dst)),
+ (MOV64ri32 tglobaladdr:$dst)>,
+ Requires<[KernelCode]>;
+
def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
(MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
def : Pat<(i64 (X86Wrapper mcsym:$dst)),