aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp63
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp76
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h10
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td31
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td70
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td7
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp54
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp39
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h17
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td24
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td19
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td67
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp12
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp13
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td9
-rw-r--r--llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp4
31 files changed, 331 insertions, 319 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 50a8754..479e345 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5666,18 +5666,21 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
VectorType *AccumVectorType =
VectorType::get(AccumType, VF.divideCoefficientBy(Ratio));
// We don't yet support all kinds of legalization.
- auto TA = TLI->getTypeAction(AccumVectorType->getContext(),
- EVT::getEVT(AccumVectorType));
- switch (TA) {
+ auto TC = TLI->getTypeConversion(AccumVectorType->getContext(),
+ EVT::getEVT(AccumVectorType));
+ switch (TC.first) {
default:
return Invalid;
case TargetLowering::TypeLegal:
case TargetLowering::TypePromoteInteger:
case TargetLowering::TypeSplitVector:
+ // The legalised type (e.g. after splitting) must be legal too.
+ if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) !=
+ TargetLowering::TypeLegal)
+ return Invalid;
break;
}
- // Check what kind of type-legalisation happens.
std::pair<InstructionCost, MVT> AccumLT =
getTypeLegalizationCost(AccumVectorType);
std::pair<InstructionCost, MVT> InputLT =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6b3c151..1a697f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1448,10 +1448,10 @@ def Feature45BitNumRecordsBufferResource : SubtargetFeature< "45-bit-num-records
"The buffer resource (V#) supports 45-bit num_records"
>;
-def FeatureCluster : SubtargetFeature< "cluster",
- "HasCluster",
+def FeatureClusters : SubtargetFeature< "clusters",
+ "HasClusters",
"true",
- "Has cluster support"
+ "Has clusters of workgroups support"
>;
// Dummy feature used to disable assembler instructions.
@@ -2120,7 +2120,7 @@ def FeatureISAVersion12_50 : FeatureSet<
Feature45BitNumRecordsBufferResource,
FeatureSupportsXNACK,
FeatureXNACK,
- FeatureCluster,
+ FeatureClusters,
]>;
def FeatureISAVersion12_51 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 848d9a5..557d87f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5043,6 +5043,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mfma_i32_16x16x64_i8:
case Intrinsic::amdgcn_mfma_i32_32x32x32_i8:
case Intrinsic::amdgcn_mfma_f32_16x16x32_bf16: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned MinNumRegsRequired = DstSize / 32;
+
// Default for MAI intrinsics.
// srcC can also be an immediate which can be folded later.
// FIXME: Should we eventually add an alternative mapping with AGPR src
@@ -5051,29 +5054,32 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// vdst, srcA, srcB, srcC
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
OpdsMapping[0] =
- Info->mayNeedAGPRs()
+ Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
- Info->mayNeedAGPRs()
+ Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ unsigned MinNumRegsRequired = DstSize / 32;
+
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
OpdsMapping[0] =
- Info->mayNeedAGPRs()
+ Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getVGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] =
- Info->mayNeedAGPRs()
+ Info->getMinNumAGPRs() >= MinNumRegsRequired
? getAGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI)
: getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index a67a7be..d0c0822 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1944,6 +1944,7 @@ public:
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
+ void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
bool parseDimId(unsigned &Encoding);
ParseStatus parseDim(OperandVector &Operands);
@@ -9239,6 +9240,33 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
MCOI::OperandConstraint::TIED_TO) == -1;
}
+void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
+ unsigned Opc = Inst.getOpcode();
+ constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2};
+ constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers};
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
+ // no src1. So continue instead of break.
+ continue;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+ uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+ // op_sel[3] is encoded in src0_modifiers.
+ if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
+ ModVal |= SISrcMods::DST_OP_SEL;
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
{
OptionalImmIndexMap OptionalIdx;
@@ -9275,6 +9303,16 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOModSI);
+
+ // Some v_interp instructions use op_sel[3] for dst.
+ if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyOpSel);
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+
+ cvtOpSelHelper(Inst, OpSel);
+ }
}
void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
@@ -9310,31 +9348,10 @@ void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
if (OpSelIdx == -1)
return;
- const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
- AMDGPU::OpName::src2};
- const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
- AMDGPU::OpName::src1_modifiers,
- AMDGPU::OpName::src2_modifiers};
-
unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
-
- for (int J = 0; J < 3; ++J) {
- int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
- if (OpIdx == -1)
- break;
-
- int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
- uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
-
- if ((OpSel & (1 << J)) != 0)
- ModVal |= SISrcMods::OP_SEL_0;
- if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
- (OpSel & (1 << 3)) != 0)
- ModVal |= SISrcMods::DST_OP_SEL;
-
- Inst.getOperand(ModIdx).setImm(ModVal);
- }
+ cvtOpSelHelper(Inst, OpSel);
}
+
void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 7b94ea3..f291e37 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -541,7 +541,7 @@ unsigned GCNSubtarget::getMaxNumSGPRs(const Function &F) const {
unsigned GCNSubtarget::getBaseMaxNumVGPRs(
const Function &F, std::pair<unsigned, unsigned> NumVGPRBounds) const {
- const auto &[Min, Max] = NumVGPRBounds;
+ const auto [Min, Max] = NumVGPRBounds;
// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 879bf5a..c2e6078 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -288,7 +288,7 @@ protected:
bool Has45BitNumRecordsBufferResource = false;
- bool HasCluster = false;
+ bool HasClusters = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1839,7 +1839,7 @@ public:
}
/// \returns true if the subtarget supports clusters of workgroups.
- bool hasClusters() const { return HasCluster; }
+ bool hasClusters() const { return HasClusters; }
/// \returns true if the subtarget requires a wait for xcnt before atomic
/// flat/global stores & rmw.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index d3b5718..3563caa 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1280,6 +1280,17 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
(ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
}
+ // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but no
+ // src1.
+ if (NumOps == 1 && AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src2) &&
+ !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1)) {
+ Ops[NumOps++] = DefaultValue; // Set src1_modifiers to default.
+ int Mod2Idx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
+ assert(Mod2Idx != -1);
+ Ops[NumOps++] = MI->getOperand(Mod2Idx).getImm();
+ }
+
const bool HasDst =
(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1) ||
(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst) != -1);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e233457..1a686a9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -17346,74 +17346,24 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
MachineFunction *MF = MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
if (TII->isVOP3(MI.getOpcode())) {
// Make sure constant bus requirements are respected.
TII->legalizeOperandsVOP3(MRI, MI);
- // Prefer VGPRs over AGPRs in mAI instructions where possible.
- // This saves a chain-copy of registers and better balance register
- // use between vgpr and agpr as agpr tuples tend to be big.
- if (!MI.getDesc().operands().empty()) {
- unsigned Opc = MI.getOpcode();
- bool HasAGPRs = Info->mayNeedAGPRs();
- const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
- int16_t Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
- for (auto I :
- {AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), Src2Idx}) {
- if (I == -1)
- break;
- if ((I == Src2Idx) && (HasAGPRs))
- break;
- MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !Op.getReg().isVirtual())
- continue;
- auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
- if (!TRI->hasAGPRs(RC))
- continue;
- auto *Src = MRI.getUniqueVRegDef(Op.getReg());
- if (!Src || !Src->isCopy() ||
- !TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
- continue;
- auto *NewRC = TRI->getEquivalentVGPRClass(RC);
- // All uses of agpr64 and agpr32 can also accept vgpr except for
- // v_accvgpr_read, but we do not produce agpr reads during selection,
- // so no use checks are needed.
- MRI.setRegClass(Op.getReg(), NewRC);
- }
-
- if (TII->isMAI(MI)) {
- // The ordinary src0, src1, src2 were legalized above.
- //
- // We have to also legalize the appended v_mfma_ld_scale_b32 operands,
- // as a separate instruction.
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::scale_src0);
- if (Src0Idx != -1) {
- int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::scale_src1);
- if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
- TII->usesConstantBus(MRI, MI, Src1Idx))
- TII->legalizeOpWithMove(MI, Src1Idx);
- }
- }
-
- if (!HasAGPRs)
- return;
-
- // Resolve the rest of AV operands to AGPRs.
- if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
- if (Src2->isReg() && Src2->getReg().isVirtual()) {
- auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
- if (TRI->isVectorSuperClass(RC)) {
- auto *NewRC = TRI->getEquivalentAGPRClass(RC);
- MRI.setRegClass(Src2->getReg(), NewRC);
- if (Src2->isTied())
- MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
- }
- }
+ if (TII->isMAI(MI)) {
+ // The ordinary src0, src1, src2 were legalized above.
+ //
+ // We have to also legalize the appended v_mfma_ld_scale_b32 operands,
+ // as a separate instruction.
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::scale_src0);
+ if (Src0Idx != -1) {
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::scale_src1);
+ if (TII->usesConstantBus(MRI, MI, Src0Idx) &&
+ TII->usesConstantBus(MRI, MI, Src1Idx))
+ TII->legalizeOpWithMove(MI, Src1Idx);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 908d856..b398db4 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -33,17 +33,20 @@ using namespace llvm;
// optimal RC for Opc and Dest of MFMA. In particular, there are high RP cases
// where it is better to produce the VGPR form (e.g. if there are VGPR users
// of the MFMA result).
-static cl::opt<bool> MFMAVGPRForm(
- "amdgpu-mfma-vgpr-form", cl::Hidden,
+static cl::opt<bool, true> MFMAVGPRFormOpt(
+ "amdgpu-mfma-vgpr-form",
cl::desc("Whether to force use VGPR for Opc and Dest of MFMA. If "
"unspecified, default to compiler heuristics"),
- cl::init(false));
+ cl::location(SIMachineFunctionInfo::MFMAVGPRForm), cl::init(false),
+ cl::Hidden);
const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
const SITargetLowering *TLI = STI->getTargetLowering();
return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
}
+bool SIMachineFunctionInfo::MFMAVGPRForm = false;
+
SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
const GCNSubtarget *STI)
: AMDGPUMachineFunction(F, *STI), Mode(F, *STI), GWSResourcePSV(getTM(STI)),
@@ -81,14 +84,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
}
- MayNeedAGPRs = ST.hasMAIInsts();
if (ST.hasGFX90AInsts()) {
- // FIXME: MayNeedAGPRs is a misnomer for how this is used. MFMA selection
- // should be separated from availability of AGPRs
- if (MFMAVGPRForm ||
- (ST.getMaxNumVGPRs(F) <= ST.getAddressableNumArchVGPRs() &&
- !mayUseAGPRs(F)))
- MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
+ // FIXME: Extract logic out of getMaxNumVectorRegs; we need to apply the
+ // allocation granule and clamping.
+ auto [MinNumAGPRAttr, MaxNumAGPRAttr] =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-agpr-alloc", {~0u, ~0u},
+ /*OnlyFirstRequired=*/true);
+ MinNumAGPRs = MinNumAGPRAttr;
}
if (AMDGPU::isChainCC(CC)) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4560615..b7dbb59 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -509,7 +509,9 @@ private:
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
- bool MayNeedAGPRs : 1;
+ /// Minimum number of AGPRs required to allocate in the function. Only
+ /// relevant for gfx90a-gfx950. For gfx908, this should be infinite.
+ unsigned MinNumAGPRs = ~0u;
// The hard-wired high half of the address of the global information table
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
@@ -537,6 +539,8 @@ private:
void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
public:
+ static bool MFMAVGPRForm;
+
struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
bool FullyAllocated = false;
@@ -1196,9 +1200,7 @@ public:
unsigned getMaxMemoryClusterDWords() const { return MaxMemoryClusterDWords; }
- bool mayNeedAGPRs() const {
- return MayNeedAGPRs;
- }
+ unsigned getMinNumAGPRs() const { return MinNumAGPRs; }
// \returns true if a function has a use of AGPRs via inline asm or
// has a call which may use it.
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 3c2dd42..3115579 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1118,12 +1118,7 @@ SIRegisterInfo::getPointerRegClass(unsigned Kind) const {
const TargetRegisterClass *
SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
- if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
- return getEquivalentVGPRClass(RC);
- if (RC == &AMDGPU::SCC_CLASSRegClass)
- return getWaveMaskRegClass();
-
- return RC;
+ return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
}
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 4a2b54d..42ec8ba 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -97,6 +97,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
VOP3_Pseudo<OpName, P, pattern> {
let AsmMatchConverter = "cvtVOP3Interp";
let mayRaiseFPException = 0;
+ let VOP3_OPSEL = P.HasOpSel;
}
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
@@ -119,16 +120,17 @@ def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let HasSrc0Mods = 0;
}
-class getInterp16Asm <bit HasSrc2, bit HasOMod> {
+class getInterp16Asm <bit HasSrc2, bit HasOMod, bit OpSel> {
string src2 = !if(HasSrc2, ", $src2_modifiers", "");
string omod = !if(HasOMod, "$omod", "");
+ string opsel = !if(OpSel, "$op_sel", "");
string ret =
- " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
+ " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod#opsel;
}
class getInterp16Ins <bit HasSrc2, bit HasOMod,
- Operand Src0Mod, Operand Src2Mod> {
- dag ret = !if(HasSrc2,
+ Operand Src0Mod, Operand Src2Mod, bit OpSel> {
+ dag ret1 = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
InterpAttr:$attr, InterpAttrChan:$attrchan,
@@ -143,19 +145,22 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, Clamp0:$clamp, omod0:$omod)
);
+ dag ret2 = !if(OpSel, (ins op_sel0:$op_sel), (ins));
+ dag ret = !con(ret1, ret2);
}
-class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
+class VOP3_INTERP16 <list<ValueType> ArgVT, bit OpSel = 0> : VOPProfile<ArgVT> {
let IsSingle = 1;
let HasOMod = !ne(DstVT.Value, f16.Value);
let HasHigh = 1;
+ let HasOpSel = OpSel;
let Src0Mod = FPVRegInputMods;
let Src2Mod = FPVRegInputMods;
let Outs64 = (outs DstRC.RegClass:$vdst);
- let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
- let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
+ let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod, OpSel>.ret;
+ let Asm64 = getInterp16Asm<HasSrc2, HasOMod, OpSel>.ret;
}
//===----------------------------------------------------------------------===//
@@ -480,7 +485,7 @@ let SubtargetPredicate = isGFX9Plus in {
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
let OtherPredicates = [isNotGFX90APlus] in
-def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
+def V_INTERP_P2_F16_opsel : VOP3Interp <"v_interp_p2_f16_opsel", VOP3_INTERP16<[f16, f32, i32, f32], /*OpSel*/ 1>>;
} // End SubtargetPredicate = isGFX9Plus
// This predicate should only apply to the selection pattern. The
@@ -2676,6 +2681,14 @@ multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName>
}
}
+multiclass VOP3Interp_F16_OpSel_Real_gfx9<bits<10> op, string OpName, string AsmName> {
+ def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
+ VOP3Interp_OpSel_gfx9 <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
+ VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
+ let AsmString = AsmName # ps.AsmOperands;
+ }
+}
+
multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
@@ -2788,7 +2801,7 @@ defm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
defm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
defm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
defm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
-defm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
+defm V_INTERP_P2_F16_opsel : VOP3Interp_F16_OpSel_Real_gfx9 <0x277, "V_INTERP_P2_F16_opsel", "v_interp_p2_f16">;
defm V_ADD_I32 : VOP3_Real_vi <0x29c>;
defm V_SUB_I32 : VOP3_Real_vi <0x29d>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 5daf860..3a0cc35 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -67,7 +67,7 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
class VOP3P_Mix_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR>
: VOP3P_Mix_Profile<P, Features, 0> {
let IsTrue16 = 1;
- let IsRealTrue16 = 1;
+ let IsRealTrue16 = 1;
let DstRC64 = getVALUDstForVT<P.DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
}
@@ -950,7 +950,7 @@ class MFMA_F8F6F4_WithSizeTable_Helper<VOP3_Pseudo ps, string F8F8Op> :
}
// Currently assumes scaled instructions never have abid
-class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled = false> : PatFrag <
+class MAIFrag<SDPatternOperator Op, bit HasAbid = true, bit Scaled = false> : PatFrag <
!if(Scaled, (ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$blgp,
node:$src0_modifiers, node:$scale_src0,
node:$src1_modifiers, node:$scale_src1),
@@ -959,37 +959,30 @@ class MAIFrag<SDPatternOperator Op, code pred, bit HasAbid = true, bit Scaled =
(ops node:$blgp))),
!if(Scaled, (Op $src0, $src1, $src2, $cbsz, $blgp, $src0_modifiers, $scale_src0, $src1_modifiers, $scale_src1),
!if(HasAbid, (Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
- (Op $src0, $src1, $src2, $cbsz, $blgp))),
- pred
->;
-
-defvar MayNeedAGPRs = [{
- return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
-}];
-
-defvar MayNeedAGPRs_gisel = [{
- return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
-}];
+ (Op $src0, $src1, $src2, $cbsz, $blgp)))>;
-defvar MayNotNeedAGPRs = [{
- return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
-}];
+class CanUseAGPR_MAI<ValueType vt> {
+ code PredicateCode = [{
+ return !Subtarget->hasGFX90AInsts() ||
+ (!SIMachineFunctionInfo::MFMAVGPRForm &&
+ MF->getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
+ }] # !srl(vt.Size, 5) # ");";
-defvar MayNotNeedAGPRs_gisel = [{
- return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs();
-}];
+ code GISelPredicateCode = [{
+ return !Subtarget->hasGFX90AInsts() ||
+ (!SIMachineFunctionInfo::MFMAVGPRForm &&
+ MF.getInfo<SIMachineFunctionInfo>()->getMinNumAGPRs() >=
+ }] # !srl(vt.Size, 5) # ");";
+}
-class AgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
+class AgprMAIFrag<SDPatternOperator Op, ValueType vt, bit HasAbid = true,
bit Scaled = false> :
- MAIFrag<Op, MayNeedAGPRs, HasAbid, Scaled> {
- let GISelPredicateCode = MayNeedAGPRs_gisel;
-}
+ MAIFrag<Op, HasAbid, Scaled>,
+ CanUseAGPR_MAI<vt>;
class VgprMAIFrag<SDPatternOperator Op, bit HasAbid = true,
- bit Scaled = false> :
- MAIFrag<Op, MayNotNeedAGPRs, HasAbid, Scaled> {
- let GISelPredicateCode = MayNotNeedAGPRs_gisel;
-}
+ bit Scaled = false> :
+ MAIFrag<Op, HasAbid, Scaled>;
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
@@ -1037,16 +1030,19 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
bit HasAbid = true,
bit Scaled = false> {
defvar NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap;
+ defvar ProfileAGPR = !cast<VOPProfileMAI>("VOPProfileMAI_" # P);
+ defvar ProfileVGPR = !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD");
+
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
- def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
- !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
+ def _e64 : MAIInst<OpName, ProfileAGPR,
+ !if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, AgprMAIFrag<node, ProfileAGPR.DstVT, HasAbid, Scaled>), Scaled>,
MFMATable<0, "AGPR", NAME # "_e64">;
let OtherPredicates = [isGFX90APlus], Mnemonic = OpName in
- def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
+ def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", ProfileVGPR,
!if(!or(NoDstOverlap, !eq(node, null_frag)), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
MFMATable<0, "VGPR", NAME # "_vgprcd_e64", NAME # "_e64">;
}
@@ -1055,12 +1051,12 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node = null_frag,
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = OpName in {
- def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
- !if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
+ def "_mac_e64" : MAIInst<OpName # "_mac", ProfileAGPR,
+ !if(!eq(node, null_frag), null_frag, AgprMAIFrag<node, ProfileAGPR.DstVT, HasAbid, Scaled>), Scaled>,
MFMATable<1, "AGPR", NAME # "_e64", NAME # "_mac_e64">;
let OtherPredicates = [isGFX90APlus] in
- def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
+ def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", ProfileVGPR,
!if(!eq(node, null_frag), null_frag, VgprMAIFrag<node, HasAbid, Scaled>), Scaled>,
MFMATable<1, "VGPR", NAME # "_vgprcd_e64", NAME # "_mac_e64">;
}
@@ -1074,11 +1070,11 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
defvar UnscaledOpName = UnscaledOpName_#VariantSuffix;
defvar HasAbid = false;
-
- defvar NoDstOverlap = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl).NoDstOverlap;
+ defvar Profile = !cast<VOPProfileMAI>(!cast<MAIInst>(UnscaledOpName#"_e64").Pfl);
+ defvar NoDstOverlap = Profile.NoDstOverlap;
def _e64 : ScaledMAIInst<OpName,
- !cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, HasAbid, true>)>,
+ !cast<MAIInst>(UnscaledOpName#"_e64"), !if(NoDstOverlap, null_frag, AgprMAIFrag<node, Profile.DstVT, HasAbid, true>)>,
MFMATable<0, "AGPR", NAME # "_e64">;
def _vgprcd_e64 : ScaledMAIInst<OpName # "_vgprcd",
@@ -1090,7 +1086,7 @@ multiclass ScaledMAIInst_mc<string OpName, string UnscaledOpName_, SDPatternOper
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = UnscaledOpName_ in {
def _mac_e64 : ScaledMAIInst<OpName # "_mac",
- !cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, HasAbid, true>>,
+ !cast<MAIInst>(UnscaledOpName # "_mac_e64"), AgprMAIFrag<node, Profile.DstVT, HasAbid, true>>,
MFMATable<1, "AGPR", NAME # "_e64">;
def _mac_vgprcd_e64 : ScaledMAIInst<OpName # " _mac_vgprcd",
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 631f0f3..8325c62 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -419,6 +419,13 @@ class VOP3a_ScaleSel_gfx1250<bits<10> op, VOPProfile p> : VOP3e_gfx11_gfx12<op,
let Inst{14-11} = scale_sel;
}
+class VOP3Interp_OpSel_gfx9<bits<10> op, VOPProfile p> : VOP3Interp_vi<op, p> {
+ let Inst{11} = src0_modifiers{2};
+ // There's no src1
+ let Inst{13} = src2_modifiers{2};
+ let Inst{14} = !if(p.HasDst, src0_modifiers{3}, 0);
+}
+
class VOP3Interp_gfx10<bits<10> op, VOPProfile p> : VOP3e_gfx10<op, p> {
bits<6> attr;
bits<2> attrchan;
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 1fc475d..561a9c5 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -349,32 +349,30 @@ public:
bool isImm() const override {
return Kind == Immediate || Kind == Expression;
}
- bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
- bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
- bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
- bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
- bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
- bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
- bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
- bool isU6ImmX2() const { return Kind == Immediate &&
- isUInt<6>(getImm()) &&
- (getImm() & 1) == 0; }
- bool isU7Imm() const { return Kind == Immediate && isUInt<7>(getImm()); }
- bool isU7ImmX4() const { return Kind == Immediate &&
- isUInt<7>(getImm()) &&
- (getImm() & 3) == 0; }
- bool isU8Imm() const { return Kind == Immediate && isUInt<8>(getImm()); }
- bool isU8ImmX8() const { return Kind == Immediate &&
- isUInt<8>(getImm()) &&
- (getImm() & 7) == 0; }
-
- bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); }
- bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
+
+ template <uint64_t N> bool isUImm() const {
+ return Kind == Immediate && isUInt<N>(getImm());
+ }
+ template <uint64_t N> bool isSImm() const {
+ return Kind == Immediate && isInt<N>(getImm());
+ }
+ bool isU6ImmX2() const { return isUImm<6>() && (getImm() & 1) == 0; }
+ bool isU7ImmX4() const { return isUImm<7>() && (getImm() & 3) == 0; }
+ bool isU8ImmX8() const { return isUImm<8>() && (getImm() & 7) == 0; }
+
bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); }
bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); }
bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); }
bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); }
bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); }
+ bool isS34Imm() const {
+ // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
+ // ContextImmediate is needed.
+ return Kind == Expression || isSImm<34>();
+ }
+ bool isS34ImmX16() const {
+ return Kind == Expression || (isSImm<34>() && (getImm() & 15) == 0);
+ }
bool isHashImmX8() const {
// The Hash Imm form is used for instructions that check or store a hash.
@@ -384,16 +382,6 @@ public:
(getImm() & 7) == 0);
}
- bool isS34ImmX16() const {
- return Kind == Expression ||
- (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0);
- }
- bool isS34Imm() const {
- // Once the PC-Rel ABI is finalized, evaluate whether a 34-bit
- // ContextImmediate is needed.
- return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()));
- }
-
bool isTLSReg() const { return Kind == TLSRegister; }
bool isDirectBr() const {
if (Kind == Expression)
@@ -1637,7 +1625,7 @@ bool PPCAsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Operands.size() != 5)
return false;
PPCOperand &EHOp = (PPCOperand &)*Operands[4];
- if (EHOp.isU1Imm() && EHOp.getImm() == 0)
+ if (EHOp.isUImm<1>() && EHOp.getImm() == 0)
Operands.pop_back();
}
@@ -1817,7 +1805,7 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
}
PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
- if (Op.isU3Imm() && Op.getImm() == ImmVal)
+ if (Op.isUImm<3>() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 48c31c9..81d8e94 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -206,45 +206,24 @@ PPCMCCodeEmitter::getVSRpEvenEncoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
-unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpNo);
- if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
-
- // Add a fixup for the immediate field.
- addFixup(Fixups, IsLittleEndian ? 0 : 2, MO.getExpr(), PPC::fixup_ppc_half16);
- return 0;
-}
-
-uint64_t PPCMCCodeEmitter::getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- MCFixupKind Fixup) const {
+template <MCFixupKind Fixup>
+uint64_t PPCMCCodeEmitter::getImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
assert(!MO.isReg() && "Not expecting a register for this operand.");
if (MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
+ uint32_t Offset = 0;
+ if (Fixup == PPC::fixup_ppc_half16)
+ Offset = IsLittleEndian ? 0 : 2;
+
// Add a fixup for the immediate field.
- addFixup(Fixups, 0, MO.getExpr(), Fixup);
+ addFixup(Fixups, Offset, MO.getExpr(), Fixup);
return 0;
}
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_imm34);
-}
-
-uint64_t
-PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return getImm34Encoding(MI, OpNo, Fixups, STI, PPC::fixup_ppc_pcrel34);
-}
-
unsigned PPCMCCodeEmitter::getDispRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index b574557..3356513 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -47,19 +47,10 @@ public:
unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getImm34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- MCFixupKind Fixup) const;
- uint64_t getImm34EncodingNoPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ template <MCFixupKind Fixup>
+ uint64_t getImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getDispRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 60efa4c..fdca5ebc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -14,30 +14,6 @@
//===----------------------------------------------------------------------===//
// 64-bit operands.
//
-def s16imm64 : Operand<i64> {
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def u16imm64 : Operand<i64> {
- let PrintMethod = "printU16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCU16ImmAsmOperand;
- let DecoderMethod = "decodeUImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
-def s17imm64 : Operand<i64> {
- // This operand type is used for addis/lis to allow the assembler parser
- // to accept immediates in the range -65536..65535 for compatibility with
- // the GNU assembler. The operand is treated as 16-bit otherwise.
- let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
- let ParserMatchClass = PPCS17ImmAsmOperand;
- let DecoderMethod = "decodeSImmOperand<16>";
- let OperandType = "OPERAND_IMMEDIATE";
-}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index c616db4..23d6d88 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -30,6 +30,11 @@
// Altivec transformation functions and pattern fragments.
//
+// fneg is not legal, and desugared as an xor.
+def desugared_fneg : PatFrag<(ops node:$x), (v4f32 (bitconvert (xor (bitconvert $x),
+ (int_ppc_altivec_vslw (bitconvert (v16i8 immAllOnesV)),
+ (bitconvert (v16i8 immAllOnesV))))))>;
+
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
@@ -467,11 +472,12 @@ def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
[(set v4f32:$RT,
(fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
-// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+// fneg is not legal, hence we have to match on the desugared version.
def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
"vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
- [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC,
- (fneg v4f32:$RB))))]>;
+ [(set v4f32:$RT, (desugared_fneg (fma v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB))))]>;
+
let hasSideEffects = 1 in {
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
@@ -892,6 +898,13 @@ def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
// Add
def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Fused negated multiply-subtract
+def : Pat<(v4f32 (desugared_fneg
+ (int_ppc_altivec_vmaddfp v4f32:$RA, v4f32:$RC,
+ (desugared_fneg v4f32:$RB)))),
+ (VNMSUBFP $RA, $RC, $RB)>;
+
// Saturating adds/subtracts.
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 6d8c122..65d0484 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -615,7 +615,8 @@ def spe4rc : RegisterOperand<GPRC> {
}
def PPCU1ImmAsmOperand : AsmOperandClass {
- let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let Name = "U1Imm";
+ let PredicateMethod = "isUImm<1>";
let RenderMethod = "addImmOperands";
}
def u1imm : Operand<i32> {
@@ -626,7 +627,8 @@ def u1imm : Operand<i32> {
}
def PPCU2ImmAsmOperand : AsmOperandClass {
- let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let Name = "U2Imm";
+ let PredicateMethod = "isUImm<2>";
let RenderMethod = "addImmOperands";
}
def u2imm : Operand<i32> {
@@ -647,7 +649,8 @@ def atimm : Operand<i32> {
}
def PPCU3ImmAsmOperand : AsmOperandClass {
- let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let Name = "U3Imm";
+ let PredicateMethod = "isUImm<3>";
let RenderMethod = "addImmOperands";
}
def u3imm : Operand<i32> {
@@ -658,7 +661,8 @@ def u3imm : Operand<i32> {
}
def PPCU4ImmAsmOperand : AsmOperandClass {
- let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
+ let Name = "U4Imm";
+ let PredicateMethod = "isUImm<4>";
let RenderMethod = "addImmOperands";
}
def u4imm : Operand<i32> {
@@ -668,7 +672,8 @@ def u4imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS5ImmAsmOperand : AsmOperandClass {
- let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let Name = "S5Imm";
+ let PredicateMethod = "isSImm<5>";
let RenderMethod = "addImmOperands";
}
def s5imm : Operand<i32> {
@@ -678,7 +683,8 @@ def s5imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU5ImmAsmOperand : AsmOperandClass {
- let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let Name = "U5Imm";
+ let PredicateMethod = "isUImm<5>";
let RenderMethod = "addImmOperands";
}
def u5imm : Operand<i32> {
@@ -688,7 +694,8 @@ def u5imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU6ImmAsmOperand : AsmOperandClass {
- let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let Name = "U6Imm";
+ let PredicateMethod = "isUImm<6>";
let RenderMethod = "addImmOperands";
}
def u6imm : Operand<i32> {
@@ -698,7 +705,8 @@ def u6imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU7ImmAsmOperand : AsmOperandClass {
- let Name = "U7Imm"; let PredicateMethod = "isU7Imm";
+ let Name = "U7Imm";
+ let PredicateMethod = "isUImm<7>";
let RenderMethod = "addImmOperands";
}
def u7imm : Operand<i32> {
@@ -708,7 +716,8 @@ def u7imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU8ImmAsmOperand : AsmOperandClass {
- let Name = "U8Imm"; let PredicateMethod = "isU8Imm";
+ let Name = "U8Imm";
+ let PredicateMethod = "isUImm<8>";
let RenderMethod = "addImmOperands";
}
def u8imm : Operand<i32> {
@@ -718,7 +727,8 @@ def u8imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU10ImmAsmOperand : AsmOperandClass {
- let Name = "U10Imm"; let PredicateMethod = "isU10Imm";
+ let Name = "U10Imm";
+ let PredicateMethod = "isUImm<10>";
let RenderMethod = "addImmOperands";
}
def u10imm : Operand<i32> {
@@ -728,7 +738,8 @@ def u10imm : Operand<i32> {
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCU12ImmAsmOperand : AsmOperandClass {
- let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let Name = "U12Imm";
+ let PredicateMethod = "isUImm<12>";
let RenderMethod = "addImmOperands";
}
def u12imm : Operand<i32> {
@@ -743,7 +754,14 @@ def PPCS16ImmAsmOperand : AsmOperandClass {
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCS16ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -754,7 +772,14 @@ def PPCU16ImmAsmOperand : AsmOperandClass {
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCU16ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -768,7 +793,17 @@ def s17imm : Operand<i32> {
// to accept immediates in the range -65536..65535 for compatibility with
// the GNU assembler. The operand is treated as 16-bit otherwise.
let PrintMethod = "printS16ImmOperand";
- let EncoderMethod = "getImm16Encoding";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def s17imm64 : Operand<i64> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_half16>";
let ParserMatchClass = PPCS17ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<16>";
let OperandType = "OPERAND_IMMEDIATE";
@@ -780,14 +815,14 @@ def PPCS34ImmAsmOperand : AsmOperandClass {
}
def s34imm : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingNoPCRel";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_imm34>";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
let OperandType = "OPERAND_IMMEDIATE";
}
def s34imm_pcrel : Operand<i64> {
let PrintMethod = "printS34ImmOperand";
- let EncoderMethod = "getImm34EncodingPCRel";
+ let EncoderMethod = "getImmEncoding<PPC::fixup_ppc_pcrel34>";
let ParserMatchClass = PPCS34ImmAsmOperand;
let DecoderMethod = "decodeSImmOperand<34>";
let OperandType = "OPERAND_IMMEDIATE";
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 34026ed..ecfb5fe 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -439,18 +439,6 @@ bool RISCVCallLowering::canLowerReturn(MachineFunction &MF,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs,
MF.getFunction().getContext());
- const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
-
- std::optional<unsigned> FirstMaskArgument = std::nullopt;
- // Preassign the first mask argument.
- if (Subtarget.hasVInstructions()) {
- for (const auto &ArgIdx : enumerate(Outs)) {
- MVT ArgVT = MVT::getVT(ArgIdx.value().Ty);
- if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
- FirstMaskArgument = ArgIdx.index();
- }
- }
-
for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
MVT VT = MVT::getVT(Outs[I].Ty);
if (CC_RISCV(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo,
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 597dd12..9f9ae2f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -324,6 +324,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = GPRValueMapping;
+ // Atomics always use GPR destinations. Don't refine any further.
+ if (cast<GLoad>(MI).isAtomic())
+ break;
+
// Use FPR64 for s64 loads on rv32.
if (GPRSize == 32 && Size.getFixedValue() == 64) {
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
@@ -358,6 +362,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = GPRValueMapping;
+ // Atomics always use GPR sources. Don't refine any further.
+ if (cast<GStore>(MI).isAtomic())
+ break;
+
// Use FPR64 for s64 stores on rv32.
if (GPRSize == 32 && Size.getFixedValue() == 64) {
assert(MF.getSubtarget<RISCVSubtarget>().hasStdExtD());
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index a02de31..27cf057 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1421,7 +1421,7 @@ def HasVendorXMIPSCMov
: Predicate<"Subtarget->hasVendorXMIPSCMov()">,
AssemblerPredicate<(all_of FeatureVendorXMIPSCMov),
"'Xmipscmov' ('mips.ccmov' instruction)">;
-def UseCCMovInsn : Predicate<"Subtarget->useCCMovInsn()">;
+def UseMIPSCCMovInsn : Predicate<"Subtarget->useMIPSCCMovInsn()">;
def FeatureVendorXMIPSLSP
: RISCVExtension<1, 0, "MIPS optimization for hardware load-store bonding">;
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index 7f5d0af..6d01250 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -190,3 +190,29 @@ let Predicates = [HasStdExtZbkb, NoStdExtZbb, IsRV64] in {
def : Pat<(i64 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>;
def : Pat<(i32 (zext (i16 GPR:$rs))), (PACKW GPR:$rs, (XLenVT X0))>;
}
+
+//===----------------------------------------------------------------------===//
+// Zalasr patterns not used by SelectionDAG
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZalasr] in {
+ // the sequentially consistent loads use
+ // .aq instead of .aqrl to match the psABI/A.7
+ def : PatLAQ<acquiring_load<atomic_load_aext_8>, LB_AQ, i16>;
+ def : PatLAQ<seq_cst_load<atomic_load_aext_8>, LB_AQ, i16>;
+
+ def : PatLAQ<acquiring_load<atomic_load_nonext_16>, LH_AQ, i16>;
+ def : PatLAQ<seq_cst_load<atomic_load_nonext_16>, LH_AQ, i16>;
+
+ def : PatSRL<releasing_store<atomic_store_8>, SB_RL, i16>;
+ def : PatSRL<seq_cst_store<atomic_store_8>, SB_RL, i16>;
+
+ def : PatSRL<releasing_store<atomic_store_16>, SH_RL, i16>;
+ def : PatSRL<seq_cst_store<atomic_store_16>, SH_RL, i16>;
+}
+
+let Predicates = [HasStdExtZalasr, IsRV64] in {
+ // Load pattern is in RISCVInstrInfoZalasr.td and shared with RV32.
+ def : PatSRL<releasing_store<atomic_store_32>, SW_RL, i32>;
+ def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL, i32>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dcce2d2..6234714 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -434,7 +434,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ABS, MVT::i32, Custom);
}
- if (!Subtarget.useCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
+ if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
@@ -25031,8 +25031,17 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
// Mark RVV intrinsic as supported.
- if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID()))
+ if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
+ // GISel doesn't support tuple types yet.
+ if (Inst.getType()->isRISCVVectorTupleTy())
+ return true;
+
+ for (unsigned i = 0; i < II->arg_size(); ++i)
+ if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())
+ return true;
+
return false;
+ }
}
if (Inst.getType()->isScalableTy())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
index 115ab38e..0b5bee1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
@@ -175,7 +175,7 @@ def MIPS_CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd),
Sched<[]>;
}
-let Predicates = [UseCCMovInsn] in {
+let Predicates = [UseMIPSCCMovInsn] in {
def : Pat<(select (riscv_setne (XLenVT GPR:$rs2)),
(XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
(MIPS_CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1dd7332..1deecd2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -93,12 +93,11 @@ let Predicates = [HasStdExtZalasr] in {
def : PatSRL<releasing_store<atomic_store_32>, SW_RL>;
def : PatSRL<seq_cst_store<atomic_store_32>, SW_RL>;
-} // Predicates = [HasStdExtZalasr]
-let Predicates = [HasStdExtZalasr, IsRV32] in {
- def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ>;
- def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ>;
-} // Predicates = [HasStdExtZalasr, IsRV32]
+ // Used by GISel for RV32 and RV64.
+ def : PatLAQ<acquiring_load<atomic_load_nonext_32>, LW_AQ, i32>;
+ def : PatLAQ<seq_cst_load<atomic_load_nonext_32>, LW_AQ, i32>;
+} // Predicates = [HasStdExtZalasr]
let Predicates = [HasStdExtZalasr, IsRV64] in {
def : PatLAQ<acquiring_load<atomic_load_asext_32>, LW_AQ, i64>;
diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
index c81a20b..115a96e 100644
--- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp
@@ -92,7 +92,7 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
const RISCVSubtarget &Subtarget = Fn.getSubtarget<RISCVSubtarget>();
- if (!Subtarget.useLoadStorePairs())
+ if (!Subtarget.useMIPSLoadStorePairs())
return false;
bool MadeChange = false;
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index e35ffaf..715ac4c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -65,9 +65,9 @@ static cl::opt<bool> UseMIPSLoadStorePairsOpt(
cl::desc("Enable the load/store pair optimization pass"), cl::init(false),
cl::Hidden);
-static cl::opt<bool> UseCCMovInsn("use-riscv-ccmov",
- cl::desc("Use 'mips.ccmov' instruction"),
- cl::init(true), cl::Hidden);
+static cl::opt<bool> UseMIPSCCMovInsn("use-riscv-mips-ccmov",
+ cl::desc("Use 'mips.ccmov' instruction"),
+ cl::init(true), cl::Hidden);
void RISCVSubtarget::anchor() {}
@@ -246,10 +246,10 @@ void RISCVSubtarget::overridePostRASchedPolicy(
}
}
-bool RISCVSubtarget::useLoadStorePairs() const {
+bool RISCVSubtarget::useMIPSLoadStorePairs() const {
return UseMIPSLoadStorePairsOpt && HasVendorXMIPSLSP;
}
-bool RISCVSubtarget::useCCMovInsn() const {
- return UseCCMovInsn && HasVendorXMIPSCMov;
+bool RISCVSubtarget::useMIPSCCMovInsn() const {
+ return UseMIPSCCMovInsn && HasVendorXMIPSCMov;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 7dffa63..6acf799 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -227,8 +227,8 @@ public:
unsigned getXLen() const {
return is64Bit() ? 64 : 32;
}
- bool useLoadStorePairs() const;
- bool useCCMovInsn() const;
+ bool useMIPSLoadStorePairs() const;
+ bool useMIPSCCMovInsn() const;
unsigned getFLen() const {
if (HasStdExtD)
return 64;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 9f2e075..e16c8f0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -2811,9 +2811,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
GetElementPtrInst *NewGEP = simplifyZeroLengthArrayGepInst(Ref);
if (NewGEP) {
Ref->replaceAllUsesWith(NewGEP);
- if (isInstructionTriviallyDead(Ref))
- DeadInsts.insert(Ref);
-
+ DeadInsts.insert(Ref);
Ref = NewGEP;
}
if (Type *GepTy = getGEPType(Ref))