diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 72 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 47 |
5 files changed, 103 insertions, 37 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 1efef83..56a4cc3 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -174,8 +174,7 @@ public: std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &, uint64_t &) override; void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) override; + uint8_t *Data, uint64_t Value, bool IsResolved) override; bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands, const MCSubtargetInfo &STI) const override; @@ -512,9 +511,8 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, isFirstMacroFusibleInst(Inst, *MCII))) { // If we meet a unfused branch or the first instuction in a fusiable pair, // insert a BoundaryAlign fragment. - PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>( - AlignBoundary, STI); - OS.insert(PendingBA); + PendingBA = + OS.newSpecialFragment<MCBoundaryAlignFragment>(AlignBoundary, STI); } } @@ -676,9 +674,8 @@ std::optional<bool> X86AsmBackend::evaluateFixup(const MCFragment &, } void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, - const MCValue &Target, - MutableArrayRef<char> Data, uint64_t Value, - bool IsResolved) { + const MCValue &Target, uint8_t *Data, + uint64_t Value, bool IsResolved) { // Force relocation when there is a specifier. This might be too conservative // - GAS doesn't emit a relocation for call local@plt; local:. if (Target.getSpecifier()) @@ -710,7 +707,7 @@ void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, } for (unsigned i = 0; i != Size; ++i) - Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); + Data[i] = uint8_t(Value >> (i * 8)); } bool X86AsmBackend::mayNeedRelaxation(unsigned Opcode, diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 3323b38..ea0abdd 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -349,8 +349,8 @@ unsigned X86ELFObjectWriter::getRelocType(const MCFixup &Fixup, case X86::S_TLSLDM: case X86::S_TPOFF: case X86::S_DTPOFF: - if (auto *S = Target.getAddSym()) - cast<MCSymbolELF>(S)->setType(ELF::STT_TLS); + if (auto *S = const_cast<MCSymbol *>(Target.getAddSym())) + static_cast<MCSymbolELF *>(S)->setType(ELF::STT_TLS); break; default: break; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index b8e117b..ff27005 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -369,7 +369,7 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { printRegName(O, Op.getReg()); } else if (Op.isImm()) { - markup(O, Markup::Immediate) << formatImm((int64_t)Op.getImm()); + markup(O, Markup::Immediate) << formatImm(Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); O << "offset "; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bbbb1d9..f366094 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8279,8 +8279,8 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, - unsigned &NumExtracts, - bool &IsSubAdd) { + unsigned &NumExtracts, bool &IsSubAdd, + bool &HasAllowContract) { using namespace SDPatternMatch; MVT VT = BV->getSimpleValueType(0); @@ -8292,6 +8292,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, SDValue InVec1 = DAG.getUNDEF(VT); NumExtracts = 0; + HasAllowContract = NumElts != 0; // Odd-numbered elements in the input build vector are obtained from // adding/subtracting two integer/float elements. @@ -8350,6 +8351,7 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, // Increment the number of extractions done. ++NumExtracts; + HasAllowContract &= Op->getFlags().hasAllowContract(); } // Ensure we have found an opcode for both parities and that they are @@ -8393,9 +8395,10 @@ static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, /// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit /// FMADDSUB is. static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, - SelectionDAG &DAG, - SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, - unsigned ExpectedUses) { + SelectionDAG &DAG, SDValue &Opnd0, + SDValue &Opnd1, SDValue &Opnd2, + unsigned ExpectedUses, + bool AllowSubAddOrAddSubContract) { if (Opnd0.getOpcode() != ISD::FMUL || !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) return false; @@ -8406,7 +8409,8 @@ static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, // or MUL + ADDSUB to FMADDSUB. const TargetOptions &Options = DAG.getTarget().Options; bool AllowFusion = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + Options.AllowFPOpFusion == FPOpFusion::Fast || + (AllowSubAddOrAddSubContract && Opnd0->getFlags().hasAllowContract()); if (!AllowFusion) return false; @@ -8427,15 +8431,17 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, SDValue Opnd0, Opnd1; unsigned NumExtracts; bool IsSubAdd; - if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, - IsSubAdd)) + bool HasAllowContract; + if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, IsSubAdd, + HasAllowContract)) return SDValue(); MVT VT = BV->getSimpleValueType(0); // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; - if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) { + if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts, + HasAllowContract)) { unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); } @@ -9132,11 +9138,17 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue SrcVec, IndicesVec; + + auto PeekThroughFreeze = [](SDValue N) { + if (N->getOpcode() == ISD::FREEZE && N.hasOneUse()) + return N->getOperand(0); + return N; + }; // Check for a match of the permute source vector and permute index elements. // This is done by checking that the i-th build_vector operand is of the form: // (extract_elt SrcVec, (extract_elt IndicesVec, i)). for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { - SDValue Op = V.getOperand(Idx); + SDValue Op = PeekThroughFreeze(V.getOperand(Idx)); if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); @@ -23486,7 +23498,6 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC, } // Try to shrink i64 compares if the input has enough zero bits. - // TODO: Add sign-bits equivalent for isX86CCSigned(X86CC)? if (CmpVT == MVT::i64 && !isX86CCSigned(X86CC) && Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. DAG.MaskedValueIsZero(Op1, APInt::getHighBitsSet(64, 32)) && @@ -23496,6 +23507,16 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, X86::CondCode X86CC, Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); } + // Try to shrink all i64 compares if the inputs are representable as signed + // i32. + if (CmpVT == MVT::i64 && + Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. + DAG.ComputeNumSignBits(Op1) > 32 && DAG.ComputeNumSignBits(Op0) > 32) { + CmpVT = MVT::i32; + Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); + Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); + } + // 0-x == y --> x+y == 0 // 0-x != y --> x+y != 0 if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) && @@ -43165,7 +43186,7 @@ static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) { /// the fact that they're unused. static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, - bool &IsSubAdd) { + bool &IsSubAdd, bool &HasAllowContract) { EVT VT = N->getValueType(0); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -43216,6 +43237,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, // It's a subadd if the vector in the even parity is an FADD. IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD : V2->getOpcode() == ISD::FADD; + HasAllowContract = + V1->getFlags().hasAllowContract() && V2->getFlags().hasAllowContract(); Opnd0 = LHS; Opnd1 = RHS; @@ -43273,14 +43296,17 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const SDLoc &DL, SDValue Opnd0, Opnd1; bool IsSubAdd; - if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) + bool HasAllowContract; + if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd, + HasAllowContract)) return SDValue(); MVT VT = N->getSimpleValueType(0); // Try to generate X86ISD::FMADDSUB node here. SDValue Opnd2; - if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) { + if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2, + HasAllowContract)) { unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); } @@ -54220,7 +54246,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, } // Try to form a MULHU or MULHS node by looking for -// (trunc (srl (mul ext, ext), 16)) +// (trunc (srl (mul ext, ext), >= 16)) // TODO: This is X86 specific because we want to be able to handle wide types // before type legalization. But we can only do it if the vector will be // legalized via widening/splitting. Type legalization can't handle promotion @@ -54245,10 +54271,16 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, // First instruction should be a right shift by 16 of a multiply. SDValue LHS, RHS; + APInt ShiftAmt; if (!sd_match(Src, - m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_SpecificInt(16)))) + m_Srl(m_Mul(m_Value(LHS), m_Value(RHS)), m_ConstInt(ShiftAmt)))) return SDValue(); + if (ShiftAmt.ult(16) || ShiftAmt.uge(InVT.getScalarSizeInBits())) + return SDValue(); + + uint64_t AdditionalShift = ShiftAmt.getZExtValue() - 16; + // Count leading sign/zero bits on both inputs - if there are enough then // truncation back to vXi16 will be cheap - either as a pack/shuffle // sequence or using AVX512 truncations. If the inputs are sext/zext then the @@ -54286,7 +54318,9 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, InVT.getSizeInBits() / 16); SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS), DAG.getBitcast(BCVT, RHS)); - return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res)); + Res = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res)); + return DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getShiftAmountConstant(AdditionalShift, VT, DL)); } // Truncate back to source type. @@ -54294,7 +54328,9 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS); unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU; - return DAG.getNode(Opc, DL, VT, LHS, RHS); + SDValue Res = DAG.getNode(Opc, DL, VT, LHS, RHS); + return DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getShiftAmountConstant(AdditionalShift, VT, DL)); } // Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 5862c7e..7c594d0 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -2781,6 +2781,38 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, return Bytes == MFI.getObjectSize(FI); } +static bool +mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, + Register CallerSRetReg) { + const auto &Outs = CLI.Outs; + const auto &OutVals = CLI.OutVals; + + // We know the caller has a sret pointer argument (CallerSRetReg). Locate the + // operand index within the callee that may have a sret pointer too. + unsigned Pos = 0; + for (unsigned E = Outs.size(); Pos != E; ++Pos) + if (Outs[Pos].Flags.isSRet()) + break; + // Bail out if the callee has not any sret argument. + if (Pos == Outs.size()) + return false; + + // At this point, either the caller is forwarding its sret argument to the + // callee, or the callee is being passed a different sret pointer. We now look + // for a CopyToReg, where the callee sret argument is written into a new vreg + // (which should later be %rax/%eax, if this is returned). + SDValue SRetArgVal = OutVals[Pos]; + for (SDNode *User : SRetArgVal->users()) { + if (User->getOpcode() != ISD::CopyToReg) + continue; + Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal) + return true; + } + + return false; +} + /// Check whether the call is eligible for tail call optimization. Targets /// that want to do tail call optimization should implement this function. /// Note that the x86 backend does not check musttail calls for eligibility! The @@ -2802,6 +2834,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( // If -tailcallopt is specified, make fastcc functions tail-callable. MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); const Function &CallerF = MF.getFunction(); // If the function return type is x86_fp80 and the callee return type is not, @@ -2838,14 +2871,15 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( if (RegInfo->hasStackRealignment(MF)) return false; - // Also avoid sibcall optimization if we're an sret return fn and the callee - // is incompatible. See comment in LowerReturn about why hasStructRetAttr is - // insufficient. - if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) { + // Avoid sibcall optimization if we are an sret return function and the callee + // is incompatible, unless such premises are proven wrong. See comment in + // LowerReturn about why hasStructRetAttr is insufficient. + if (Register SRetReg = FuncInfo->getSRetReturnReg()) { // For a compatible tail call the callee must return our sret pointer. So it // needs to be (a) an sret function itself and (b) we pass our sret as its // sret. Condition #b is harder to determine. - return false; + if (!mayBeSRetTailCallCompatible(CLI, SRetReg)) + return false; } else if (IsCalleePopSRet) // The callee pops an sret, so we cannot tail-call, as our caller doesn't // expect that. @@ -2967,8 +3001,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization( X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, MF.getTarget().Options.GuaranteedTailCallOpt); - if (unsigned BytesToPop = - MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { + if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) { // If we have bytes to pop, the callee must pop them. bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; if (!CalleePopMatches) |