aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp4
-rw-r--r--llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp4
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp51
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td9
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp69
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp39
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp5
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td23
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td10
19 files changed, 233 insertions, 64 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 38718c4..7504f1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -150,7 +150,10 @@ public:
if (!CVisited.insert(CII).second)
continue;
- if (CII->getParent() == II->getParent() && !IsLookThru(II))
+ // Same-BB filter must look at the *user*; and allow non-lookthrough
+ // users when the def is a PHI (loop-header pattern).
+ if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
+ !isa<PHINode>(II))
continue;
if (isOpLegal(CII))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index d9bfeae..0a59132 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -2562,7 +2562,9 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
for (Function *F : NeedsPostProcess)
Splitter.processFunction(*F);
for (Function *F : Intrinsics) {
- if (isRemovablePointerIntrinsic(F->getIntrinsicID())) {
+ // use_empty() can also occur with cases like masked load, which will
+ // have been rewritten out of the module by now but not erased.
+ if (F->use_empty() || isRemovablePointerIntrinsic(F->getIntrinsicID())) {
F->eraseFromParent();
} else {
std::optional<Function *> NewF = Intrinsic::remangleIntrinsicFunction(F);
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 5be4713..9b11201 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -957,8 +957,10 @@ void LoongArchAsmParser::emitLoadAddressAbs(MCInst &Inst, SMLoc IDLoc,
: Inst.getOperand(2).getExpr();
InstSeq Insts;
+ // To distinguish between la.abs and %abs_hi20, la.abs will generate
+ // R_LARCH_MARK_LA and R_LARCH_ABS_HI20 relocations.
Insts.push_back(
- LoongArchAsmParser::Inst(LoongArch::LU12I_W, ELF::R_LARCH_ABS_HI20));
+ LoongArchAsmParser::Inst(LoongArch::LU12I_W, ELF::R_LARCH_MARK_LA));
Insts.push_back(
LoongArchAsmParser::Inst(LoongArch::ORI, ELF::R_LARCH_ABS_LO12));
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 098bcfa..4cfbfca 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2319,6 +2319,53 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
}
+/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // LoongArch LASX only supports xvinsve0.{w/d}.
+ if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
+ VT != MVT::v4f64)
+ return SDValue();
+
+ MVT GRLenVT = Subtarget.getGRLenVT();
+ int MaskSize = Mask.size();
+ assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Check if exactly one element of the Mask is replaced by 'Replaced', while
+ // all other elements are either 'Base + i' or undef (-1). On success, return
+ // the index of the replaced element. Otherwise, just return -1.
+ auto checkReplaceOne = [&](int Base, int Replaced) -> int {
+ int Idx = -1;
+ for (int i = 0; i < MaskSize; ++i) {
+ if (Mask[i] == Base + i || Mask[i] == -1)
+ continue;
+ if (Mask[i] != Replaced)
+ return -1;
+ if (Idx == -1)
+ Idx = i;
+ else
+ return -1;
+ }
+ return Idx;
+ };
+
+ // Case 1: the lowest element of V2 replaces one element in V1.
+ int Idx = checkReplaceOne(0, MaskSize);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ // Case 2: the lowest element of V1 replaces one element in V2.
+ Idx = checkReplaceOne(MaskSize, 0);
+ if (Idx != -1)
+ return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
+ DAG.getConstant(Idx, DL, GRLenVT));
+
+ return SDValue();
+}
+
/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
@@ -2595,6 +2642,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
@@ -7453,6 +7503,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(XVPERM)
NODE_NAME_CASE(XVREPLVE0)
NODE_NAME_CASE(XVREPLVE0Q)
+ NODE_NAME_CASE(XVINSVE0)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9b60a9f..8a4d774 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -151,6 +151,7 @@ enum NodeType : unsigned {
XVPERM,
XVREPLVE0,
XVREPLVE0Q,
+ XVINSVE0,
// Extended vector element extraction
VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index bbc0489..5143d53 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
+def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>;
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm),
(XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>;
// XVINSVE0_{W/D}
+def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
+def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm),
+ (XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>;
+def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm),
+ (XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>;
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
(XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32),
uimm3:$imm)>;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
index 0d77617..8ecb62d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
@@ -32,6 +32,7 @@ static StringRef getLoongArchSpecifierName(uint16_t S) {
return "b16";
case ELF::R_LARCH_B21:
return "b21";
+ case ELF::R_LARCH_MARK_LA:
case ELF::R_LARCH_ABS_HI20:
return "abs_hi20";
case ELF::R_LARCH_ABS_LO12:
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index b7ead5e..f0e2bc4 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -161,6 +161,13 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
case ELF::R_LARCH_B26:
FixupKind = LoongArch::fixup_loongarch_b26;
break;
+ case ELF::R_LARCH_MARK_LA:
+ // Match gas behavior: generate `R_LARCH_MARK_LA` relocation when using
+ // `la.abs`.
+ Fixups.push_back(
+ MCFixup::create(0, MCConstantExpr::create(0, Ctx),
+ FirstLiteralRelocationKind + ELF::R_LARCH_MARK_LA));
+ [[fallthrough]];
case ELF::R_LARCH_ABS_HI20:
FixupKind = LoongArch::fixup_loongarch_abs_hi20;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index cb57c43..d4d9e54 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -193,7 +193,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
// we need to invert the branch condition to jump over TrueBB when the
// condition is false.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
// Insert branch instruction.
BuildMI(MBB, MBBI, DL, TII->get(RISCVCC::getBrCond(CC)))
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 56db09a..70b6c7e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1023,6 +1023,37 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
Cond.push_back(LastInst.getOperand(1));
}
+static unsigned getInverseXqcicmOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ case RISCV::QC_MVEQ:
+ return RISCV::QC_MVNE;
+ case RISCV::QC_MVNE:
+ return RISCV::QC_MVEQ;
+ case RISCV::QC_MVLT:
+ return RISCV::QC_MVGE;
+ case RISCV::QC_MVGE:
+ return RISCV::QC_MVLT;
+ case RISCV::QC_MVLTU:
+ return RISCV::QC_MVGEU;
+ case RISCV::QC_MVGEU:
+ return RISCV::QC_MVLTU;
+ case RISCV::QC_MVEQI:
+ return RISCV::QC_MVNEI;
+ case RISCV::QC_MVNEI:
+ return RISCV::QC_MVEQI;
+ case RISCV::QC_MVLTI:
+ return RISCV::QC_MVGEI;
+ case RISCV::QC_MVGEI:
+ return RISCV::QC_MVLTI;
+ case RISCV::QC_MVLTUI:
+ return RISCV::QC_MVGEUI;
+ case RISCV::QC_MVGEUI:
+ return RISCV::QC_MVLTUI;
+ }
+}
+
unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
switch (SelectOpc) {
default:
@@ -1134,7 +1165,7 @@ unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, unsigned SelectOpc) {
}
}
-RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
+RISCVCC::CondCode RISCVCC::getInverseBranchCondition(RISCVCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unrecognized conditional branch");
@@ -1554,7 +1585,7 @@ bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
return Register();
};
- unsigned NewOpc = RISCVCC::getBrCond(getOppositeBranchCondition(CC));
+ unsigned NewOpc = RISCVCC::getBrCond(getInverseBranchCondition(CC));
// Might be case 1.
// Don't change 0 to 1 since we can use x0.
@@ -1801,7 +1832,7 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
// Add condition code, inverting if necessary.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
if (Invert)
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
NewMI.addImm(CC);
// Copy the false register.
@@ -3762,6 +3793,19 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
// Operands 1 and 2 are commutable, if we switch the opcode.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
+ case RISCV::QC_MVEQ:
+ case RISCV::QC_MVNE:
+ case RISCV::QC_MVLT:
+ case RISCV::QC_MVGE:
+ case RISCV::QC_MVLTU:
+ case RISCV::QC_MVGEU:
+ case RISCV::QC_MVEQI:
+ case RISCV::QC_MVNEI:
+ case RISCV::QC_MVLTI:
+ case RISCV::QC_MVGEI:
+ case RISCV::QC_MVLTUI:
+ case RISCV::QC_MVGEUI:
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 4);
case RISCV::TH_MULA:
case RISCV::TH_MULAW:
case RISCV::TH_MULAH:
@@ -3974,11 +4018,28 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
OpIdx2);
}
+ case RISCV::QC_MVEQ:
+ case RISCV::QC_MVNE:
+ case RISCV::QC_MVLT:
+ case RISCV::QC_MVGE:
+ case RISCV::QC_MVLTU:
+ case RISCV::QC_MVGEU:
+ case RISCV::QC_MVEQI:
+ case RISCV::QC_MVNEI:
+ case RISCV::QC_MVLTI:
+ case RISCV::QC_MVGEI:
+ case RISCV::QC_MVLTUI:
+ case RISCV::QC_MVGEUI: {
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(getInverseXqcicmOpcode(MI.getOpcode())));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
+ OpIdx2);
+ }
case RISCV::PseudoCCMOVGPRNoX0:
case RISCV::PseudoCCMOVGPR: {
// CCMOV can be commuted by inverting the condition.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
- CC = RISCVCC::getOppositeBranchCondition(CC);
+ CC = RISCVCC::getInverseBranchCondition(CC);
auto &WorkingMI = cloneIfNew(MI);
WorkingMI.getOperand(3).setImm(CC);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 2bc499b..42a0c4c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -44,7 +44,7 @@ enum CondCode {
COND_INVALID
};
-CondCode getOppositeBranchCondition(CondCode);
+CondCode getInverseBranchCondition(CondCode);
unsigned getBrCond(CondCode CC, unsigned SelectOpc = 0);
} // end of namespace RISCVCC
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 13b02d1..ff4a040 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -604,7 +604,7 @@ class QCILICC<bits<3> funct3, bits<2> funct2, DAGOperand InTyRs2, string opcodes
let Inst{31-25} = {simm, funct2};
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class QCIMVCC<bits<3> funct3, string opcodestr>
: RVInstR4<0b00, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs3),
@@ -612,7 +612,7 @@ class QCIMVCC<bits<3> funct3, string opcodestr>
let Constraints = "$rd = $rd_wb";
}
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
class QCIMVCCI<bits<3> funct3, string opcodestr, DAGOperand immType>
: RVInstR4<0b10, funct3, OPC_CUSTOM_2, (outs GPRNoX0:$rd_wb),
(ins GPRNoX0:$rd, GPRNoX0:$rs1, immType:$imm, GPRNoX0:$rs3),
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 563f3bb..d4124ae 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -167,6 +167,42 @@ static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {
return false;
}
+// If this is i64 AND is part of (X & -(1 << C1) & 0xffffffff) == C2 << C1),
+// DAGCombiner can convert this to (sraiw X, C1) == sext(C2) for RV64. On RV32,
+// the type will be split so only the lower 32 bits need to be compared using
+// (srai/srli X, C) == C2.
+static bool canUseShiftCmp(Instruction *Inst, const APInt &Imm) {
+ if (!Inst->hasOneUse())
+ return false;
+
+ // Look for equality comparison.
+ auto *Cmp = dyn_cast<ICmpInst>(*Inst->user_begin());
+ if (!Cmp || !Cmp->isEquality())
+ return false;
+
+ // Right hand side of comparison should be a constant.
+ auto *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
+ if (!C)
+ return false;
+
+ uint64_t Mask = Imm.getZExtValue();
+
+ // Mask should be of the form -(1 << C) in the lower 32 bits.
+ if (!isUInt<32>(Mask) || !isPowerOf2_32(-uint32_t(Mask)))
+ return false;
+
+ // Comparison constant should be a subset of Mask.
+ uint64_t CmpC = C->getZExtValue();
+ if ((CmpC & Mask) != CmpC)
+ return false;
+
+ // We'll need to sign extend the comparison constant and shift it right. Make
+ // sure the new constant can use addi/xori+seqz/snez.
+ unsigned ShiftBits = llvm::countr_zero(Mask);
+ int64_t NewCmpC = SignExtend64<32>(CmpC) >> ShiftBits;
+ return NewCmpC >= -2048 && NewCmpC <= 2048;
+}
+
InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind,
@@ -224,6 +260,9 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
canUseShiftPair(Inst, Imm))
return TTI::TCC_Free;
+ if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
+ canUseShiftCmp(Inst, Imm))
+ return TTI::TCC_Free;
Takes12BitImm = true;
break;
case Instruction::Add:
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index b4fc8da..db85e33 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -587,7 +587,8 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass(
}
if (FPClassTest PartialCheck = Mask & fcNan) {
- auto InfWithQnanBitC = buildSPIRVConstant(IntTy, Inf | QNaNBitMask);
+ auto InfWithQnanBitC =
+ buildSPIRVConstant(IntTy, std::move(Inf) | QNaNBitMask);
if (PartialCheck == fcNan) {
// isnan(V) ==> abs(V) u> int(inf)
appendToRes(
@@ -613,7 +614,7 @@ bool SPIRVLegalizerInfo::legalizeIsFPClass(
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
auto ExpMinusOne = assignSPIRVTy(
MIRBuilder.buildSub(IntTy, Abs, buildSPIRVConstant(IntTy, ExpLSB)));
- APInt MaxExpMinusOne = ExpMask - ExpLSB;
+ APInt MaxExpMinusOne = std::move(ExpMask) - ExpLSB;
auto NormalRes = assignSPIRVTy(
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
buildSPIRVConstant(IntTy, MaxExpMinusOne)));
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7eb67a..cd04ff5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31215,16 +31215,16 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
unsigned NumElts = VT.getVectorNumElements();
if (Subtarget.hasVBMI2() && EltSizeInBits > 8) {
- if (IsFSHR)
- std::swap(Op0, Op1);
if (IsCstSplat) {
+ if (IsFSHR)
+ std::swap(Op0, Op1);
uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
{Op0, Op1, Imm}, DAG, Subtarget);
}
- return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ return getAVX512Node(IsFSHR ? ISD::FSHR : ISD::FSHL, DL, VT,
{Op0, Op1, Amt}, DAG, Subtarget);
}
assert((VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8 ||
@@ -35139,8 +35139,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VALIGN)
NODE_NAME_CASE(VSHLD)
NODE_NAME_CASE(VSHRD)
- NODE_NAME_CASE(VSHLDV)
- NODE_NAME_CASE(VSHRDV)
NODE_NAME_CASE(PSHUFD)
NODE_NAME_CASE(PSHUFHW)
NODE_NAME_CASE(PSHUFLW)
@@ -45171,6 +45169,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
return true;
+ case X86ISD::INSERTPS:
case X86ISD::BLENDI:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
@@ -45241,6 +45240,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
case X86ISD::BLENDV:
return false;
// SSE target shuffles.
+ case X86ISD::INSERTPS:
case X86ISD::PSHUFB:
case X86ISD::PSHUFD:
case X86ISD::UNPCKL:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 8ab8c66..b55556a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -471,8 +471,7 @@ namespace llvm {
// VBMI2 Concat & Shift.
VSHLD,
VSHRD,
- VSHLDV,
- VSHRDV,
+
// Shuffle Packed Values at 128-bit granularity.
SHUF128,
MOVDDUP,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2371ed4..564810c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12300,72 +12300,76 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
// VBMI2
//===----------------------------------------------------------------------===//
-multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in {
defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src3))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src3))))>,
T8, PD, EVEX, VVVV, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.LdFrag addr:$src3))))>,
+ !if(SwapLR,
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.LdFrag addr:$src3)))),
+ (VTI.VT (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.LdFrag addr:$src3)))))>,
T8, PD, EVEX, VVVV,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
-multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
- : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
+ : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched, VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
"${src3}"#VTI.BroadcastStr#", $src2",
"$src2, ${src3}"#VTI.BroadcastStr,
- (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
+ !if(SwapLR,
+ (OpNode (VTI.VT VTI.RC:$src2), (VTI.VT VTI.RC:$src1), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))),
+ (OpNode (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2), (VTI.VT (VTI.BroadcastLdFrag addr:$src3))))>,
T8, PD, EVEX, VVVV, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
-multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
-multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
+multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, bit SwapLR,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
+ defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.ZMM, VTI.info512>,
EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
+ defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.YMM, VTI.info256>,
EVEX_V256;
- defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
+ defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, SwapLR, sched.XMM, VTI.info128>,
EVEX_V128;
}
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
- SDNode OpNode, X86SchedWriteWidths sched> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
+ SDNode OpNode, bit SwapLR, X86SchedWriteWidths sched> {
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, SwapLR, sched,
avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, SwapLR, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, SwapLR, sched,
avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
}
@@ -12381,8 +12385,8 @@ multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
}
// Concat & Shift
-defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
-defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
+defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", fshl, 0, SchedWriteVecIMul>;
+defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", fshr, 1, SchedWriteVecIMul>;
defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index b4768590..031fdc1 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -25,18 +25,12 @@ let SchedRW = [WriteLEA] in {
[(set GR32:$dst, lea32addr:$src)]>,
OpSize32, Requires<[Not64BitMode]>;
- let Predicates = [HasNDD], isCodeGenOnly = 1 in {
- def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR8:$dst), (ins lea64_8mem:$src),
- "lea{b}\t{$src|$dst}, {$dst|$src}",
- [(set GR8:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
-
- def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins lea64_16mem:$src),
- "lea{w}\t{$src|$dst}, {$dst|$src}",
- [(set GR16:$dst, lea64_iaddr:$src)]>,
- OpSize16,
- Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in {
+ def LEA64_8r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_8mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
+
+ def LEA64_16r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_16mem:$src),
+ "lea{l}\t{$src|$dst}, {$dst|$src}", []>, OpSize32;
}
def LEA64_32r : I<0x8D, MRMSrcMem, (outs GR32:$dst), (ins lea64_32mem:$src),
@@ -51,6 +45,11 @@ let SchedRW = [WriteLEA] in {
[(set GR64:$dst, lea64addr:$src)]>;
} // SchedRW
+let Predicates = [HasNDD] in {
+ def : Pat<(i8 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_8r lea64_8mem:$src), sub_8bit)>;
+ def : Pat<(i16 lea64_iaddr:$src), (EXTRACT_SUBREG (LEA64_16r lea64_16mem:$src), sub_16bit)>;
+}
+
// Pseudo instruction for lea that prevent optimizer from eliminating
// the instruction.
let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0c20ffe..5321ecf 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -406,16 +406,6 @@ def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
-def X86VShldv : SDNode<"X86ISD::VSHLDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
-def X86VShrdv : SDNode<"X86ISD::VSHRDV",
- SDTypeProfile<1, 3, [SDTCisVec<0>,
- SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;