aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp67
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h13
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp140
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp30
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp29
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h3
23 files changed, 382 insertions, 71 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 201bfe0..d6a3d59 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1236,14 +1236,20 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
.add(MI.getOperand(3));
transferImpOps(MI, I, I);
} else {
+ unsigned RegState =
+ getRenamableRegState(MI.getOperand(1).isRenamable()) |
+ getKillRegState(
+ MI.getOperand(1).isKill() &&
+ MI.getOperand(1).getReg() != MI.getOperand(2).getReg() &&
+ MI.getOperand(1).getReg() != MI.getOperand(3).getReg());
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
: AArch64::ORRv16i8))
.addReg(DstReg,
RegState::Define |
getRenamableRegState(MI.getOperand(0).isRenamable()))
- .add(MI.getOperand(1))
- .add(MI.getOperand(1));
+ .addReg(MI.getOperand(1).getReg(), RegState)
+ .addReg(MI.getOperand(1).getReg(), RegState);
auto I2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index d068a12..b033f88 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7362,7 +7362,9 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
[(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b", []>;
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
+ (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm)))))]>;
let Predicates = [HasAES] in {
def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
V128, V64, V64,
@@ -7374,10 +7376,6 @@ multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
[(set (v16i8 V128:$Rd), (OpNode (extract_high_v2i64 (v2i64 V128:$Rn)),
(extract_high_v2i64 (v2i64 V128:$Rm))))]>;
}
-
- def : Pat<(v8i16 (OpNode (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn))),
- (v8i8 (extract_high_v16i8 (v16i8 V128:$Rm))))),
- (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
}
multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
@@ -7402,6 +7400,7 @@ multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
(extract_high_v4i32 (v4i32 V128:$Rm))))]>;
}
+let isCommutable = 1 in
multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode = null_frag> {
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
@@ -7483,6 +7482,7 @@ multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
(extract_high_v4i32 (v4i32 V128:$Rm)))))))]>;
}
+let isCommutable = 1 in
multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode = null_frag> {
def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ac31236..8cfbff9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6055,6 +6055,7 @@ defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
+let isCommutable = 1 in
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
TriOpFrag<(add node:$LHS, (abds node:$MHS, node:$RHS))> >;
@@ -6806,6 +6807,7 @@ defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>
defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
+let isCommutable = 1 in
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", abds>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", abds>;
@@ -6822,6 +6824,7 @@ defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
int_aarch64_neon_sqdmull>;
+let isCommutable = 0 in
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
@@ -6836,6 +6839,7 @@ defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
+let isCommutable = 0 in
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index f136a184..a67bd42 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -585,8 +585,7 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
ClMaxLifetimes);
if (StandardLifetime) {
IntrinsicInst *Start = Info.LifetimeStart[0];
- uint64_t Size =
- cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
+ uint64_t Size = *Info.AI->getAllocationSize(*DL);
Size = alignTo(Size, kTagGranuleSize);
tagAlloca(AI, Start->getNextNode(), TagPCall, Size);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index a6e4a63..40d960e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5879,8 +5879,12 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo &MRI = *B.getMRI();
- std::tie(BaseReg, ImmOffset) =
- AMDGPU::getBaseWithConstantOffset(MRI, OrigOffset);
+ // On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
+ // being added, so we can only safely match a 32-bit addition with no unsigned
+ // overflow.
+ bool CheckNUW = AMDGPU::isGFX1250(ST);
+ std::tie(BaseReg, ImmOffset) = AMDGPU::getBaseWithConstantOffset(
+ MRI, OrigOffset, /*KnownBits=*/nullptr, CheckNUW);
// If BaseReg is a pointer, convert it to int.
if (MRI.getType(BaseReg).isPointer())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
index f580f43..c21a9a1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteAGPRCopyMFMA.cpp
@@ -109,12 +109,17 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
// Find AV_* registers assigned to AGPRs.
const TargetRegisterClass *VirtRegRC = MRI.getRegClass(VReg);
- if (!TRI.isVectorSuperClass(VirtRegRC))
+ if (!TRI.hasAGPRs(VirtRegRC))
continue;
- const TargetRegisterClass *AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
- if (!TRI.isAGPRClass(AssignedRC))
- continue;
+ const TargetRegisterClass *AssignedRC = VirtRegRC;
+ if (TRI.hasVGPRs(VirtRegRC)) {
+ // If this is an AV register, we have to check if the actual assignment is
+ // to an AGPR
+ AssignedRC = TRI.getPhysRegBaseClass(PhysReg);
+ if (!TRI.isAGPRClass(AssignedRC))
+ continue;
+ }
LiveInterval &LI = LIS.getInterval(VReg);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ff8efd2..0d2feeb 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4933,6 +4933,43 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
return false;
}
+ // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
+ // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
+ // the first SGPR and use it for both the low and high operations.
+ if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+
+ const MCOperand &Src0 = Inst.getOperand(Src0Idx);
+ const MCOperand &Src1 = Inst.getOperand(Src1Idx);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+
+ auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
+ unsigned Mask = 1U << Index;
+ return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
+ };
+
+ if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
+ !VerifyOneSGPR(/*Index=*/0))
+ return false;
+ if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
+ !VerifyOneSGPR(/*Index=*/1))
+ return false;
+
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx != -1) {
+ const MCOperand &Src2 = Inst.getOperand(Src2Idx);
+ if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
+ !VerifyOneSGPR(/*Index=*/2))
+ return false;
+ }
+ }
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 8f44c03..5b327fb 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6106,6 +6106,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
case MVT::f64:
return true;
case MVT::f16:
+ case MVT::bf16:
return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16(MF);
default:
break;
@@ -10877,6 +10878,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
}
+// Return whether the operation has NoUnsignedWrap property.
+static bool isNoUnsignedWrap(SDValue Addr) {
+ return (Addr.getOpcode() == ISD::ADD &&
+ Addr->getFlags().hasNoUnsignedWrap()) ||
+ Addr->getOpcode() == ISD::OR;
+}
+
bool SITargetLowering::shouldPreservePtrArith(const Function &F,
EVT PtrVT) const {
return UseSelectionDAGPTRADD && PtrVT == MVT::i64;
@@ -10898,8 +10906,14 @@ SITargetLowering::splitBufferOffsets(SDValue Offset, SelectionDAG &DAG) const {
if ((C1 = dyn_cast<ConstantSDNode>(N0)))
N0 = SDValue();
else if (DAG.isBaseWithConstantOffset(N0)) {
- C1 = cast<ConstantSDNode>(N0.getOperand(1));
- N0 = N0.getOperand(0);
+ // On GFX1250+, voffset and immoffset are zero-extended from 32 bits before
+ // being added, so we can only safely match a 32-bit addition with no
+ // unsigned overflow.
+ bool CheckNUW = AMDGPU::isGFX1250(*Subtarget);
+ if (!CheckNUW || isNoUnsignedWrap(N0)) {
+ C1 = cast<ConstantSDNode>(N0.getOperand(1));
+ N0 = N0.getOperand(0);
+ }
}
if (C1) {
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index d8fe850..0a68512 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -51,7 +51,7 @@ static cl::opt<unsigned>
namespace {
enum HardClauseType {
- // For GFX10:
+ // For GFX10 and GFX1250:
// Texture, buffer, global or scratch memory instructions.
HARDCLAUSE_VMEM,
@@ -102,7 +102,8 @@ public:
HardClauseType getHardClauseType(const MachineInstr &MI) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
- if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
+ if (ST->getGeneration() == AMDGPUSubtarget::GFX10 ||
+ ST->hasGFX1250Insts()) {
if ((SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isFLAT(MI)) ||
SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (ST->hasNSAClauseBug()) {
@@ -115,7 +116,6 @@ public:
if (SIInstrInfo::isFLAT(MI))
return HARDCLAUSE_FLAT;
} else {
- assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
if (SIInstrInfo::isMIMG(MI)) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f20b22d..19e6bcf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -18,6 +18,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -5534,6 +5535,15 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
+ // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
+ // information.
+ if (AMDGPU::isPackedFP32Inst(Opcode) && AMDGPU::isGFX12Plus(ST)) {
+ for (unsigned I = 0; I < 3; ++I) {
+ if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I))
+ return false;
+ }
+ }
+
return true;
}
@@ -6005,6 +6015,21 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
unsigned Opc = MI.getOpcode();
+ // See SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more
+ // information.
+ if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&
+ MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) {
+ constexpr const AMDGPU::OpName OpNames[] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
+
+ for (auto [I, OpName] : enumerate(OpNames)) {
+ int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[I]);
+ if (static_cast<unsigned>(SrcIdx) == OpIdx &&
+ !isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, I, &MO))
+ return false;
+ }
+ }
+
if (!isLegalRegOperand(MRI, OpInfo, MO))
return false;
@@ -6053,6 +6078,39 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
return true;
}
+bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(
+ const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
+ const MachineOperand *MO) const {
+ constexpr const unsigned NumOps = 3;
+ constexpr const AMDGPU::OpName OpNames[NumOps * 2] = {
+ AMDGPU::OpName::src0, AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
+
+ assert(SrcN < NumOps);
+
+ if (!MO) {
+ int SrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[SrcN]);
+ if (SrcIdx == -1)
+ return true;
+ MO = &MI.getOperand(SrcIdx);
+ }
+
+ if (!MO->isReg() || !RI.isSGPRReg(MRI, MO->getReg()))
+ return true;
+
+ int ModsIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpNames[NumOps + SrcN]);
+ if (ModsIdx == -1)
+ return true;
+
+ unsigned Mods = MI.getOperand(ModsIdx).getImm();
+ bool OpSel = Mods & SISrcMods::OP_SEL_0;
+ bool OpSelHi = Mods & SISrcMods::OP_SEL_1;
+
+ return !OpSel && !OpSelHi;
+}
+
bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineFunction &MF = *MI.getParent()->getParent();
@@ -6390,6 +6448,15 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
if ((Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e64) &&
!RI.isVGPR(MRI, MI.getOperand(VOP3Idx[2]).getReg()))
legalizeOpWithMove(MI, VOP3Idx[2]);
+
+ // Fix the register class of packed FP32 instructions on gfx12+. See
+ // SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand for more information.
+ if (AMDGPU::isPackedFP32Inst(Opc) && AMDGPU::isGFX12Plus(ST)) {
+ for (unsigned I = 0; I < 3; ++I) {
+ if (!isLegalGFX12PlusPackedMathFP32Operand(MRI, MI, /*SrcN=*/I))
+ legalizeOpWithMove(MI, VOP3Idx[I]);
+ }
+ }
}
Register SIInstrInfo::readlaneVGPRToSGPR(
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e042b59..6b9403f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1287,6 +1287,19 @@ public:
const MachineOperand &MO) const;
bool isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand &MO) const;
+
+ /// Check if \p MO would be a legal operand for gfx12+ packed math FP32
+ /// instructions. Packed math FP32 instructions typically accept SGPRs or
+ /// VGPRs as source operands. On gfx12+, if a source operand uses SGPRs, the
+ /// HW can only read the first SGPR and use it for both the low and high
+ /// operations.
+ /// \p SrcN can be 0, 1, or 2, representing src0, src1, and src2,
+ /// respectively. If \p MO is nullptr, the operand corresponding to SrcN will
+ /// be used.
+ bool isLegalGFX12PlusPackedMathFP32Operand(
+ const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,
+ const MachineOperand *MO = nullptr) const;
+
/// Legalize operands in \p MI by either commuting it or inserting a
/// copy of src1.
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 00dcb9b..1e3e9a2 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3318,6 +3318,20 @@ unsigned getLdsDwGranularity(const MCSubtargetInfo &ST) {
return 128;
}
+bool isPackedFP32Inst(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::V_PK_ADD_F32:
+ case AMDGPU::V_PK_ADD_F32_gfx12:
+ case AMDGPU::V_PK_MUL_F32:
+ case AMDGPU::V_PK_MUL_F32_gfx12:
+ case AMDGPU::V_PK_FMA_F32:
+ case AMDGPU::V_PK_FMA_F32_gfx12:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // namespace AMDGPU
raw_ostream &operator<<(raw_ostream &OS,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1252e35..1bcd36c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1709,6 +1709,8 @@ bool isArgPassedInSGPR(const Argument *Arg);
bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
+LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
+
LLVM_READONLY
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ea99cc4..75d3cfa 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -802,6 +802,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::BSWAP, VT, Expand);
}
+ if (!Subtarget->isThumb1Only() && !Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
+
+ if (!Subtarget->hasV8_1MMainlineOps())
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -1634,6 +1640,10 @@ bool ARMTargetLowering::useSoftFloat() const {
return Subtarget->useSoftFloat();
}
+bool ARMTargetLowering::shouldExpandCmpUsingSelects(EVT VT) const {
+ return !Subtarget->isThumb1Only() && VT.getSizeInBits() <= 32;
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -10612,6 +10622,133 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
return DAG.getBitcast(MVT::i32, Res);
}
+SDValue ARMTargetLowering::LowerCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // Determine if this is signed or unsigned comparison
+ bool IsSigned = (Op.getOpcode() == ISD::SCMP);
+
+ // Special case for Thumb1 UCMP only
+ if (!IsSigned && Subtarget->isThumb1Only()) {
+ // For Thumb unsigned comparison, use this sequence:
+ // subs r2, r0, r1 ; r2 = LHS - RHS, sets flags
+ // sbc r2, r2 ; r2 = r2 - r2 - !carry
+ // cmp r1, r0 ; compare RHS with LHS
+ // sbc r1, r1 ; r1 = r1 - r1 - !carry
+ // subs r0, r2, r1 ; r0 = r2 - r1 (final result)
+
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ // SUBE: Sub1Result - Sub1Result - !carry
+ // This gives 0 if LHS >= RHS (unsigned), -1 if LHS < RHS (unsigned)
+ SDValue Sbc1 =
+ DAG.getNode(ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT),
+ Sub1Result, Sub1Result, Flags1);
+ SDValue Sbc1Result = Sbc1.getValue(0);
+
+ // Second comparison: RHS vs LHS (reverse comparison)
+ SDValue CmpFlags = DAG.getNode(ARMISD::CMP, dl, FlagsVT, RHS, LHS);
+
+ // SUBE: RHS - RHS - !carry
+ // This gives 0 if RHS <= LHS (unsigned), -1 if RHS > LHS (unsigned)
+ SDValue Sbc2 = DAG.getNode(
+ ARMISD::SUBE, dl, DAG.getVTList(MVT::i32, FlagsVT), RHS, RHS, CmpFlags);
+ SDValue Sbc2Result = Sbc2.getValue(0);
+
+ // Final subtraction: Sbc1Result - Sbc2Result (no flags needed)
+ SDValue Result =
+ DAG.getNode(ISD::SUB, dl, MVT::i32, Sbc1Result, Sbc2Result);
+ if (Op.getValueType() != MVT::i32)
+ Result = DAG.getSExtOrTrunc(Result, dl, Op.getValueType());
+
+ return Result;
+ }
+
+ // For the ARM assembly pattern:
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movgt r0, #1 ; if LHS > RHS, set result to 1 (GT for signed, HI for
+ // unsigned) mvnlt r0, #0 ; if LHS < RHS, set result to -1 (LT for
+ // signed, LO for unsigned)
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ unsigned Opcode = ARMISD::SUBC;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ bool CanUseAdd = false;
+ if (IsSigned) {
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ }
+ } else {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ }
+ }
+
+ if (CanUseAdd) {
+ Opcode = ARMISD::ADDC;
+ RHS = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ // Generate the operation with flags
+ SDValue OpWithFlags;
+ if (Opcode == ARMISD::ADDC) {
+ // Use ADDC: LHS + RHS (where RHS was 0 - X, now X)
+ OpWithFlags = DAG.getNode(ARMISD::ADDC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ OpWithFlags = DAG.getNode(ARMISD::SUBC, dl,
+ DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ }
+
+ SDValue OpResult = OpWithFlags.getValue(0); // The operation result
+ SDValue Flags = OpWithFlags.getValue(1); // The flags
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getAllOnesConstant(dl, MVT::i32);
+
+ // Select condition codes based on signed vs unsigned
+ ARMCC::CondCodes GTCond = IsSigned ? ARMCC::GT : ARMCC::HI;
+ ARMCC::CondCodes LTCond = IsSigned ? ARMCC::LT : ARMCC::LO;
+
+ // First conditional move: if greater than, set to 1
+ SDValue GTCondValue = DAG.getConstant(GTCond, dl, MVT::i32);
+ SDValue Result1 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, OpResult, One,
+ GTCondValue, Flags);
+
+ // Second conditional move: if less than, set to -1
+ SDValue LTCondValue = DAG.getConstant(LTCond, dl, MVT::i32);
+ SDValue Result2 = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne,
+ LTCondValue, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10740,6 +10877,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_BF16:
return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
+ case ISD::UCMP:
+ case ISD::SCMP:
+ return LowerCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 825145d..a84a3cb 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -607,6 +607,8 @@ class VectorType;
bool preferZeroCompareBranch() const override { return true; }
+ bool shouldExpandCmpUsingSelects(EVT VT) const override;
+
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
bool hasAndNotCompare(SDValue V) const override {
@@ -904,6 +906,7 @@ class VectorType;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCMP(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index fda9d97..ca5d27d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -254,7 +254,8 @@ bool LoongArchAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_LARCH_ALIGN);
F.setVarFixups({Fixup});
F.setLinkerRelaxable();
- F.getParent()->setLinkerRelaxable();
+ if (!F.getParent()->isLinkerRelaxable())
+ F.getParent()->setFirstLinkerRelaxable(F.getLayoutOrder());
return true;
}
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 76dca47..f123040 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SpillsKnownBit = true;
break;
default:
+ // When spilling a CR bit, the super register may not be explicitly defined
+ // (i.e. it can be defined by a CR-logical that only defines the subreg) so
+ // we state that the CR field is undef. Also, in order to preserve the kill
+ // flag on the CR bit, we add it as an implicit use.
+
// On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
// bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
// the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
// register), and SETNBC will set this.
if (Subtarget.isISA3_1()) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
- .addReg(SrcReg, RegState::Undef);
+ .addReg(SrcReg, RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
- .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
}
// We need to move the CR field that contains the CR bit we are spilling.
- // The super register may not be explicitly defined (i.e. it can be defined
- // by a CR-logical that only defines the subreg) so we state that the CR
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
- // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
.addReg(SrcReg,
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 67cc01e..e0ac591 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -674,6 +674,9 @@ static constexpr FeatureBitset XAndesGroup = {
static constexpr DecoderListEntry DecoderList32[]{
// Vendor Extensions
+ {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
+ {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
+ {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
{DecoderTableXVentana32,
{RISCV::FeatureVendorXVentanaCondOps},
"XVentanaCondOps"},
@@ -690,9 +693,6 @@ static constexpr DecoderListEntry DecoderList32[]{
"MIPS mips.pref"},
{DecoderTableXAndes32, XAndesGroup, "Andes extensions"},
// Standard Extensions
- {DecoderTableXCV32, XCVFeatureGroup, "CORE-V extensions"},
- {DecoderTableXqci32, XqciFeatureGroup, "Qualcomm uC Extensions"},
- {DecoderTableXRivos32, XRivosFeatureGroup, "Rivos"},
{DecoderTable32, {}, "standard 32-bit instructions"},
{DecoderTableRV32Only32, {}, "RV32-only standard 32-bit instructions"},
{DecoderTableZfinx32, {}, "Zfinx (Float in Integer)"},
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index a997ea5..8d956ce 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -32,6 +32,11 @@ static cl::opt<bool> ULEB128Reloc(
"riscv-uleb128-reloc", cl::init(true), cl::Hidden,
cl::desc("Emit R_RISCV_SET_ULEB128/E_RISCV_SUB_ULEB128 if appropriate"));
+static cl::opt<bool>
+ AlignRvc("riscv-align-rvc", cl::init(true), cl::Hidden,
+ cl::desc("When generating R_RISCV_ALIGN, insert $alignment-2 "
+ "bytes of NOPs even in norvc code"));
+
RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
bool Is64Bit, const MCTargetOptions &Options)
: MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
@@ -306,12 +311,21 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
// If conditions are met, compute the padding size and create a fixup encoding
// the padding size in the addend.
bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
- // Use default handling unless linker relaxation is enabled and the alignment
- // is larger than the nop size.
- const MCSubtargetInfo *STI = F.getSubtargetInfo();
- if (!STI->hasFeature(RISCV::FeatureRelax))
+ // Alignments before the first linker-relaxable instruction have fixed sizes
+ // and do not require relocations. Alignments after a linker-relaxable
+ // instruction require a relocation, even if the STI specifies norelax.
+ //
+ // firstLinkerRelaxable is the layout order within the subsection, which may
+ // be smaller than the section's order. Therefore, alignments in a
+ // lower-numbered subsection may be unnecessarily treated as linker-relaxable.
+ auto *Sec = F.getParent();
+ if (F.getLayoutOrder() <= Sec->firstLinkerRelaxable())
return false;
- unsigned MinNopLen = STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
+
+ // Use default handling unless the alignment is larger than the nop size.
+ const MCSubtargetInfo *STI = F.getSubtargetInfo();
+ unsigned MinNopLen =
+ AlignRvc || STI->hasFeature(RISCV::FeatureStdExtZca) ? 2 : 4;
if (F.getAlignment() <= MinNopLen)
return false;
@@ -321,7 +335,6 @@ bool RISCVAsmBackend::relaxAlign(MCFragment &F, unsigned &Size) {
MCFixup::create(0, Expr, FirstLiteralRelocationKind + ELF::R_RISCV_ALIGN);
F.setVarFixups({Fixup});
F.setLinkerRelaxable();
- F.getParent()->setLinkerRelaxable();
return true;
}
@@ -474,8 +487,9 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
// TODO: emit a mapping symbol right here
if (Count % 4 == 2) {
- // The canonical nop with Zca is c.nop.
- OS.write(STI->hasFeature(RISCV::FeatureStdExtZca) ? "\x01\0" : "\0\0", 2);
+ // The canonical nop with Zca is c.nop. For .balign 4, we generate a 2-byte
+ // c.nop even in a norvc region.
+ OS.write("\x01\0", 2);
Count -= 2;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 05d504c..6a1f4b3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -114,6 +114,9 @@ public:
bool enableScalableVectorization() const override {
return ST->hasVInstructions();
}
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override {
+ return ST->hasVInstructions();
+ }
TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
return ST->hasVInstructions() ? TailFoldingStyle::DataWithEVL
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 74aec4f..2b34f61 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -359,18 +359,15 @@ static void lowerExpectAssume(IntrinsicInst *II) {
}
}
-static bool toSpvOverloadedIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID,
- ArrayRef<unsigned> OpNos) {
- Function *F = nullptr;
- if (OpNos.empty()) {
- F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID);
- } else {
- SmallVector<Type *, 4> Tys;
- for (unsigned OpNo : OpNos)
- Tys.push_back(II->getOperand(OpNo)->getType());
- F = Intrinsic::getOrInsertDeclaration(II->getModule(), NewID, Tys);
- }
- II->setCalledFunction(F);
+static bool toSpvLifetimeIntrinsic(IntrinsicInst *II, Intrinsic::ID NewID) {
+ IRBuilder<> Builder(II);
+ auto *Alloca = cast<AllocaInst>(II->getArgOperand(0));
+ std::optional<TypeSize> Size =
+ Alloca->getAllocationSize(Alloca->getDataLayout());
+ Value *SizeVal = Builder.getInt64(Size ? *Size : -1);
+ Builder.CreateIntrinsic(NewID, Alloca->getType(),
+ {SizeVal, II->getArgOperand(0)});
+ II->eraseFromParent();
return true;
}
@@ -406,8 +403,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
break;
case Intrinsic::lifetime_start:
if (!STI.isShader()) {
- Changed |= toSpvOverloadedIntrinsic(
- II, Intrinsic::SPVIntrinsics::spv_lifetime_start, {1});
+ Changed |= toSpvLifetimeIntrinsic(
+ II, Intrinsic::SPVIntrinsics::spv_lifetime_start);
} else {
II->eraseFromParent();
Changed = true;
@@ -415,8 +412,8 @@ bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
break;
case Intrinsic::lifetime_end:
if (!STI.isShader()) {
- Changed |= toSpvOverloadedIntrinsic(
- II, Intrinsic::SPVIntrinsics::spv_lifetime_end, {1});
+ Changed |= toSpvLifetimeIntrinsic(
+ II, Intrinsic::SPVIntrinsics::spv_lifetime_end);
} else {
II->eraseFromParent();
Changed = true;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index f32c9bd..2611c29 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -436,20 +436,6 @@ bool SystemZTTIImpl::isLSRCostLess(
C2.ScaleCost, C2.SetupCost);
}
-bool SystemZTTIImpl::areInlineCompatible(const Function *Caller,
- const Function *Callee) const {
- const TargetMachine &TM = getTLI()->getTargetMachine();
-
- const FeatureBitset &CallerBits =
- TM.getSubtargetImpl(*Caller)->getFeatureBits();
- const FeatureBitset &CalleeBits =
- TM.getSubtargetImpl(*Callee)->getFeatureBits();
-
- // Support only equal feature bitsets. Restriction should be relaxed in the
- // future to allow inlining when callee's bits are subset of the caller's.
- return CallerBits == CalleeBits;
-}
-
unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
bool Vector = (ClassID == 1);
if (!Vector)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index dc5736e..fc681de 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -65,9 +65,6 @@ public:
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) const override;
- bool areInlineCompatible(const Function *Caller,
- const Function *Callee) const override;
-
/// @}
/// \name Vector TTI Implementations