aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp10
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td21
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td33
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp82
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp145
30 files changed, 404 insertions, 51 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9ce1224..aed325c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -221,12 +221,22 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const {
const DebugLoc &DL = I.getDebugLoc();
MachineBasicBlock *BB = I.getParent();
+ Register VCCReg = I.getOperand(1).getReg();
+ MachineInstr *Cmp;
+
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ unsigned CmpOpc =
+ STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
+ Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0);
+ } else {
+ // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64
+ // which sets SCC as a side effect.
+ Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
+ .addReg(VCCReg)
+ .addReg(VCCReg);
+ }
- unsigned CmpOpc =
- STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
- MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc))
- .addReg(I.getOperand(1).getReg())
- .addImm(0);
if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
index 5407566..b84c30e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
@@ -500,6 +500,16 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) {
MI.eraseFromParent();
}
+void RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) {
+ auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg());
+ auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg());
+ auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo});
+ auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi});
+ B.buildBuildVectorTrunc(MI.getOperand(0).getReg(),
+ {ResLo.getReg(0), ResHi.getReg(0)});
+ MI.eraseFromParent();
+}
+
static bool isSignedBFE(MachineInstr &MI) {
if (GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI))
return (GI->is(Intrinsic::amdgcn_sbfe));
@@ -804,6 +814,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
}
break;
}
+ case UnpackAExt:
+ return lowerUnpackAExt(MI);
case WidenMMOToS32:
return widenMMOToS32(cast<GAnyLoad>(MI));
}
@@ -1120,7 +1132,8 @@ void RegBankLegalizeHelper::applyMappingDst(
assert(RB == SgprRB);
Register NewDst = MRI.createVirtualRegister(SgprRB_S32);
Op.setReg(NewDst);
- B.buildTrunc(Reg, NewDst);
+ if (!MRI.use_empty(Reg))
+ B.buildTrunc(Reg, NewDst);
break;
}
case InvalidMapping: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
index d937815..ad3ff1d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
@@ -124,6 +124,7 @@ private:
void lowerSplitTo32Select(MachineInstr &MI);
void lowerSplitTo32SExtInReg(MachineInstr &MI);
void lowerUnpackMinMax(MachineInstr &MI);
+ void lowerUnpackAExt(MachineInstr &MI);
};
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index a67b12a..01abd35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -470,7 +470,19 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
- .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
+ .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
+ .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt})
+ .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
+ .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
+ .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
+
+ addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
+
+ addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
+ .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}})
+ .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
index 93e0efd..030bd75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
@@ -223,7 +223,8 @@ enum LoweringMethodID {
UniCstExt,
SplitLoad,
WidenLoad,
- WidenMMOToS32
+ WidenMMOToS32,
+ UnpackAExt
};
enum FastRulesTypes {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 75a94ac..b28c50e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1315,6 +1315,9 @@ void AMDGPUPassConfig::addIRPasses() {
isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
+ if (EnableUniformIntrinsicCombine)
+ addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());
+
// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
@@ -2066,6 +2069,8 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
if (isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(AMDGPUImageIntrinsicOptimizerPass(TM));
+ if (EnableUniformIntrinsicCombine)
+ addPass(AMDGPUUniformIntrinsicCombinePass());
// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(ExpandVariadicsPass(ExpandVariadicsMode::Lowering));
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b34ab2a..8bb2808 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
SDLoc SL(N);
if (Src.getOpcode() == ISD::SETCC) {
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ // Need to expand bfloat to float for comparison (setcc).
+ if (Op0.getValueType() == MVT::bf16) {
+ Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0);
+ Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1);
+ }
// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
- return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
- Src.getOperand(1), Src.getOperand(2));
+ return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2));
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a4d3d62..6b06534 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -22109,6 +22109,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
ScalarTy->isIntegerTy(32));
}
+ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const {
+ static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM};
+ return RCRegs;
+}
+
Value *ARMTargetLowering::createComplexDeinterleavingIR(
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 357d2c5..bf3438b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -1009,6 +1009,8 @@ class VectorType;
bool isUnsupportedFloatingType(EVT VT) const;
+ ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
+
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
index 3b810d0..79863e1 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
@@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass {
public:
static char ID;
- HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {}
+ HexagonCopyHoisting() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon Copy Hoisting"; }
@@ -56,8 +56,8 @@ public:
void moveCopyInstr(MachineBasicBlock *DestBB,
std::pair<Register, Register> Key, MachineInstr *MI);
- MachineFunction *MFN;
- MachineRegisterInfo *MRI;
+ MachineFunction *MFN = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
std::vector<DenseMap<std::pair<Register, Register>, MachineInstr *>>
CopyMIList;
};
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
index 93418f7..a10c937 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
@@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions,
namespace {
class HexagonGenMemAbsolute : public MachineFunctionPass {
- const HexagonInstrInfo *TII;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
public:
static char ID;
- HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {}
+ HexagonGenMemAbsolute() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 1637b91..d19920c 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -612,6 +612,9 @@ let Predicates = [UseHVX] in {
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
def: Pat<(VecQ32 (trunc HVI32:$Vs)),
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
+ def: Pat<(VecQ16 (trunc HWI32:$Vss)),
+ (Combineq(VecQ32(V6_vandvrt (HiVec $Vss), (ToI32 0x01010101))),
+ (VecQ32 (V6_vandvrt (LoVec $Vss), (ToI32 0x01010101))))>;
}
let Predicates = [UseHVX] in {
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index b9cdd6a..ce2de75 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -544,7 +544,7 @@ int HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
if (!hasV60Ops())
return Latency;
- auto &QII = static_cast<const HexagonInstrInfo &>(*getInstrInfo());
+ const HexagonInstrInfo &QII = *getInstrInfo();
// BSB scheduling.
if (QII.isHVXVec(SrcInst) || useBSBScheduling())
Latency = (Latency + 1) >> 1;
diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
index 71bdfc66..5a85f34 100644
--- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
@@ -43,7 +43,7 @@ namespace {
class HexagonTfrCleanup : public MachineFunctionPass {
public:
static char ID;
- HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {}
+ HexagonTfrCleanup() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon TFR Cleanup"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -52,8 +52,8 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- const HexagonInstrInfo *HII;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *HII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
typedef DenseMap<unsigned, uint64_t> ImmediateMap;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73..e86b21c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
// FP Rounding
let Predicates = [HasBasicF, IsLA64] in {
def : PatFpr<frint, FRINT_S, FPR32>;
+def : PatFpr<flog2, FLOGB_S, FPR32>;
} // Predicates = [HasBasicF, IsLA64]
let Predicates = [HasBasicF, IsLA32] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa..2e88254 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
// FP Rounding
let Predicates = [HasBasicD, IsLA64] in {
def : PatFpr<frint, FRINT_D, FPR64>;
+def : PatFpr<flog2, FLOGB_D, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
/// Pseudo-instructions needed for the soft-float ABI with LA32D
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6..a6de839 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ }
if (!Subtarget.hasBasicD()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
@@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f64, Legal);
+ }
}
// Set operations for 'LSX' feature.
@@ -362,6 +366,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
@@ -443,6 +448,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 613dea6..ca4ee5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1593,6 +1593,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),
// XVFSQRT_{S/D}
defm : PatXrF<fsqrt, "XVFSQRT">;
+// XVFLOGB_{S/D}
+defm : PatXrF<flog2, "XVFLOGB">;
+
// XVRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
(XVFRECIP_S v8f32:$xj)>;
@@ -2024,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
+// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "XVAVG_B", v32i8>;
+defm : VAvgPat<sra, "XVAVG_H", v16i16>;
+defm : VAvgPat<sra, "XVAVG_W", v8i32>;
+defm : VAvgPat<sra, "XVAVG_D", v4i64>;
+defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
+defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
+defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
+defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
+defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
+defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
+defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
+defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
+defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
+defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
+defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
+
// abs
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 4619c6b..92402ba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
+multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1783,6 +1795,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),
// VFSQRT_{S/D}
defm : PatVrF<fsqrt, "VFSQRT">;
+// VFLOGB_{S/D}
+defm : PatVrF<flog2, "VFLOGB">;
+
// VFRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
(VFRECIP_S v4f32:$vj)>;
@@ -2154,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in),
def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
+// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "VAVG_B", v16i8>;
+defm : VAvgPat<sra, "VAVG_H", v8i16>;
+defm : VAvgPat<sra, "VAVG_W", v4i32>;
+defm : VAvgPat<sra, "VAVG_D", v2i64>;
+defm : VAvgPat<srl, "VAVG_BU", v16i8>;
+defm : VAvgPat<srl, "VAVG_HU", v8i16>;
+defm : VAvgPat<srl, "VAVG_WU", v4i32>;
+defm : VAvgPat<srl, "VAVG_DU", v2i64>;
+defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
+defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
+defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
+defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
+defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
+defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
+defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
+defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
+
// abs
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 000d296..4ff489d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,8 +296,9 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
- : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU,
- computeFSAdditions(FS, OL, TT), Options,
+ : CodeGenTargetMachineImpl(T,
+ TT.computeDataLayout(Options.MCOptions.ABIName),
+ TT, CPU, computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 8198173..282cf5d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -92,6 +92,10 @@ private:
void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
MachineIRBuilder &MIB) const;
bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ void addVectorLoadStoreOperands(MachineInstr &I,
+ SmallVectorImpl<SrcOp> &SrcOps,
+ unsigned &CurOp, bool IsMasked,
+ bool IsStrided) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
@@ -716,6 +720,26 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
return GenericOpc;
}
+void RISCVInstructionSelector::addVectorLoadStoreOperands(
+ MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp,
+ bool IsMasked, bool IsStrided) const {
+ // Base Pointer
+ auto PtrReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PtrReg);
+
+ // Stride
+ if (IsStrided) {
+ auto StrideReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(StrideReg);
+ }
+
+ // Mask
+ if (IsMasked) {
+ auto MaskReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(MaskReg);
+ }
+}
+
bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineIRBuilder &MIB) const {
// Find the intrinsic ID.
@@ -752,21 +776,7 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
SrcOps.push_back(Register(RISCV::NoRegister));
}
- // Base Pointer
- auto PtrReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(PtrReg);
-
- // Stride
- if (IsStrided) {
- auto StrideReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(StrideReg);
- }
-
- // Mask
- if (IsMasked) {
- auto MaskReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(MaskReg);
- }
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
const RISCV::VLEPseudo *P =
@@ -795,6 +805,48 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vsm:
+ case Intrinsic::riscv_vse:
+ case Intrinsic::riscv_vse_mask:
+ case Intrinsic::riscv_vsse:
+ case Intrinsic::riscv_vsse_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vse_mask ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ bool IsStrided = IntrinID == Intrinsic::riscv_vsse ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ LLT VT = MRI->getType(I.getOperand(1).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Sources
+ unsigned CurOp = 1;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Store value
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
+ IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 4105618..526675a 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -127,6 +127,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCMAX:
+ case RISCV::PseudoCCMAXU:
+ case RISCV::PseudoCCMIN:
+ case RISCV::PseudoCCMINU:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -217,6 +221,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
.addImm(0);
} else {
unsigned NewOpc;
+ // clang-format off
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
@@ -228,6 +233,10 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break;
+ case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
+ case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
+ case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
@@ -250,6 +259,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break;
case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break;
}
+ // clang-format on
if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index b4556f6..cfee6ab 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1851,6 +1851,11 @@ def TuneShortForwardBranchOpt
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+def TuneShortForwardBranchIMinMax
+ : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax",
+ "true", "Enable short forward branch optimization for min,max instructions in Zbb",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9a6afa1..b25a054 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3995,6 +3995,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1c930ac..56881f7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -433,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtP() ||
(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
setOperationAction(ISD::ABS, XLenVT, Legal);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
} else if (Subtarget.hasShortForwardBranchOpt()) {
// We can use PseudoCCSUB to implement ABS.
setOperationAction(ISD::ABS, XLenVT, Legal);
@@ -14816,8 +14818,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
+ if (Subtarget.hasStdExtP()) {
+ SDValue Src =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
+ return;
+ }
+
if (Subtarget.hasStdExtZbb()) {
- // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
+ // Emit a special node that will be expanded to NEGW+MAX at isel.
// This allows us to remember that the result is sign extended. Expanding
// to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
@@ -20290,6 +20300,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::ABSW:
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
@@ -21862,6 +21873,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::REMUW:
case RISCVISD::ROLW:
case RISCVISD::RORW:
+ case RISCVISD::ABSW:
case RISCVISD::FCVT_W_RV64:
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 912b82d..3a7013d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1699,6 +1699,10 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::AND: return RISCV::PseudoCCAND;
case RISCV::OR: return RISCV::PseudoCCOR;
case RISCV::XOR: return RISCV::PseudoCCXOR;
+ case RISCV::MAX: return RISCV::PseudoCCMAX;
+ case RISCV::MAXU: return RISCV::PseudoCCMAXU;
+ case RISCV::MIN: return RISCV::PseudoCCMIN;
+ case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1735,7 +1739,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
/// return the defining instruction.
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII) {
+ const TargetInstrInfo *TII,
+ const RISCVSubtarget &STI) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -1743,6 +1748,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
+
+ if (!STI.hasShortForwardBranchIMinMax() &&
+ (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
+ MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
+ return nullptr;
+
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
@@ -1806,10 +1817,10 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
- canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
bool Invert = !DefMI;
if (!DefMI)
- DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
+ DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
if (!DefMI)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index cc085bb..4cbbba3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1461,5 +1461,10 @@ let Predicates = [HasStdExtP, IsRV32] in {
// Codegen patterns
//===----------------------------------------------------------------------===//
+def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>;
+
let Predicates = [HasStdExtP] in
def : PatGpr<abs, ABS>;
+
+let Predicates = [HasStdExtP, IsRV64] in
+def : PatGpr<riscv_absw, ABSW>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 0114fbd..5a67a5a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -106,6 +106,10 @@ def PseudoCCSRA : SFBALU_rr;
def PseudoCCAND : SFBALU_rr;
def PseudoCCOR : SFBALU_rr;
def PseudoCCXOR : SFBALU_rr;
+def PseudoCCMAX : SFBALU_rr;
+def PseudoCCMIN : SFBALU_rr;
+def PseudoCCMAXU : SFBALU_rr;
+def PseudoCCMINU : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index d08115b..ea98cdb 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -172,6 +172,7 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::ABSW:
case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_W_INX:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 624cff2..49beada 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48778,10 +48778,9 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
SDValue BC0 = peekThroughBitcasts(Op0);
if (BC0.getOpcode() == X86ISD::PCMPEQ &&
ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
- SDLoc DL(EFLAGS);
CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
- SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
- return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
+ SDValue X = DAG.getBitcast(OpVT, DAG.getFreeze(BC0.getOperand(0)));
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, X, X);
}
}
}
@@ -48837,7 +48836,7 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
MVT FloatVT =
MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
- Res = DAG.getBitcast(FloatVT, Res);
+ Res = DAG.getBitcast(FloatVT, DAG.getFreeze(Res));
return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);
} else if (EltBits == 16) {
MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
@@ -48856,8 +48855,30 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
}
// TESTZ(X,-1) == TESTZ(X,X)
- if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ if (ISD::isBuildVectorAllOnes(Op1.getNode())) {
+ Op0 = DAG.getFreeze(Op0);
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+
+ // Attempt to convert PTESTZ(X,SIGNMASK) -> VTESTPD/PSZ(X,X) on AVX targets.
+ if (EFLAGS.getOpcode() == X86ISD::PTEST && Subtarget.hasAVX()) {
+ KnownBits KnownOp1 = DAG.computeKnownBits(Op1);
+ assert(KnownOp1.getBitWidth() == 64 &&
+ "Illegal PTEST vector element width");
+ if (KnownOp1.isConstant()) {
+ const APInt &Mask = KnownOp1.getConstant();
+ if (Mask.isSignMask()) {
+ MVT FpVT = MVT::getVectorVT(MVT::f64, OpVT.getSizeInBits() / 64);
+ Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0));
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+ if (Mask.isSplat(32) && Mask.trunc(32).isSignMask()) {
+ MVT FpVT = MVT::getVectorVT(MVT::f32, OpVT.getSizeInBits() / 32);
+ Op0 = DAG.getBitcast(FpVT, DAG.getFreeze(Op0));
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Op0, Op0);
+ }
+ }
+ }
// TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
// TODO: Add COND_NE handling?
@@ -53480,6 +53501,80 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Look for a RMW operation that only touches one bit of a larger than legal
+// type and fold it to a BTC/BTR/BTS pattern acting on a single i32 sub value.
+static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
+
+ // Only handle normal stores and its chain was a matching normal load.
+ auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
+ if (!ISD::isNormalStore(St) || !St->isSimple() || !Ld ||
+ !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
+ Ld->getBasePtr() != St->getBasePtr() ||
+ Ld->getOffset() != St->getOffset())
+ return SDValue();
+
+ SDValue LoadVal(Ld, 0);
+ SDValue StoredVal = St->getValue();
+ EVT VT = StoredVal.getValueType();
+
+ // Only narrow larger than legal scalar integers.
+ if (!VT.isScalarInteger() ||
+ VT.getSizeInBits() <= (Subtarget.is64Bit() ? 64 : 32))
+ return SDValue();
+
+ // BTR: X & ~(1 << ShAmt)
+ // BTS: X | (1 << ShAmt)
+ // BTC: X ^ (1 << ShAmt)
+ SDValue ShAmt;
+ if (!StoredVal.hasOneUse() ||
+ !(sd_match(StoredVal, m_And(m_Specific(LoadVal),
+ m_Not(m_Shl(m_One(), m_Value(ShAmt))))) ||
+ sd_match(StoredVal,
+ m_Or(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt)))) ||
+ sd_match(StoredVal,
+ m_Xor(m_Specific(LoadVal), m_Shl(m_One(), m_Value(ShAmt))))))
+ return SDValue();
+
+ // Ensure the shift amount is in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().uge(VT.getSizeInBits()))
+ return SDValue();
+
+ // Split the shift into an alignment shift that moves the active i32 block to
+ // the bottom bits for truncation and a modulo shift that can act on the i32.
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt, DAG.getConstant(31, DL, AmtVT));
+
+ // Compute the byte offset for the i32 block that is changed by the RMW.
+ // combineTruncate will adjust the load for us in a similar way.
+ EVT PtrVT = St->getBasePtr().getValueType();
+ SDValue PtrBitOfs = DAG.getZExtOrTrunc(AlignAmt, DL, PtrVT);
+ SDValue PtrByteOfs = DAG.getNode(ISD::SRL, DL, PtrVT, PtrBitOfs,
+ DAG.getShiftAmountConstant(3, PtrVT, DL));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(St->getBasePtr(), PtrByteOfs, DL,
+ SDNodeFlags::NoUnsignedWrap);
+
+ // Reconstruct the BTC/BTR/BTS pattern for the i32 block and store.
+ SDValue X = DAG.getNode(ISD::SRL, DL, VT, LoadVal, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+
+ SDValue Mask =
+ DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ if (StoredVal.getOpcode() == ISD::AND)
+ Mask = DAG.getNOT(DL, Mask, MVT::i32);
+
+ SDValue Res = DAG.getNode(StoredVal.getOpcode(), DL, MVT::i32, X, Mask);
+ return DAG.getStore(St->getChain(), DL, Res, NewPtr, St->getPointerInfo(),
+ Align(), St->getMemOperand()->getFlags());
+}
+
static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -53706,6 +53801,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
+ return R;
+
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
@@ -54660,8 +54758,9 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
// truncation, see if we can convert the shift into a pointer offset instead.
// Limit this to normal (non-ext) scalar integer loads.
if (SrcVT.isScalarInteger() && Src.getOpcode() == ISD::SRL &&
- Src.hasOneUse() && Src.getOperand(0).hasOneUse() &&
- ISD::isNormalLoad(Src.getOperand(0).getNode())) {
+ Src.hasOneUse() && ISD::isNormalLoad(Src.getOperand(0).getNode()) &&
+ (Src.getOperand(0).hasOneUse() ||
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, SrcVT))) {
auto *Ld = cast<LoadSDNode>(Src.getOperand(0));
if (Ld->isSimple() && VT.isByteSized() &&
isPowerOf2_64(VT.getSizeInBits())) {
@@ -56459,6 +56558,7 @@ static SDValue combineAVX512SetCCToKMOV(EVT VT, SDValue Op0, ISD::CondCode CC,
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ using namespace SDPatternMatch;
const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
const SDValue LHS = N->getOperand(0);
const SDValue RHS = N->getOperand(1);
@@ -56517,6 +56617,37 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
+ // If we're performing a bit test on a larger than legal type, attempt
+ // to (aligned) shift down the value to the bottom 32-bits and then
+ // perform the bittest on the i32 value.
+ // ICMP_ZERO(AND(X,SHL(1,IDX)))
+ // --> ICMP_ZERO(AND(TRUNC(SRL(X,AND(IDX,-32))),SHL(1,AND(IDX,31))))
+ if (isNullConstant(RHS) &&
+ OpVT.getScalarSizeInBits() > (Subtarget.is64Bit() ? 64 : 32)) {
+ SDValue X, ShAmt;
+ if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
+ m_Shl(m_One(), m_Value(ShAmt)))))) {
+ // Only attempt this if the shift amount is known to be in bounds.
+ KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
+ if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
+ EVT AmtVT = ShAmt.getValueType();
+ SDValue AlignAmt =
+ DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getSignedConstant(-32LL, DL, AmtVT));
+ SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+ DAG.getConstant(31, DL, AmtVT));
+ SDValue Mask = DAG.getNode(
+ ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
+ DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+ X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
+ X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+ X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32),
+ CC);
+ }
+ }
+ }
+
// cmpeq(trunc(x),C) --> cmpeq(x,C)
// cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.