diff options
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 9 |
5 files changed, 74 insertions, 25 deletions
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 6261fad..706ab2b 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -160,6 +160,14 @@ FunctionPass *createX86PartialReductionPass(); /// // Analyzes and emits pseudos to support Win x64 Unwind V2. FunctionPass *createX86WinEHUnwindV2Pass(); +/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics +/// or split the data to two <128 x i32>. +FunctionPass *createX86LowerAMXTypePass(); + +/// The pass transforms amx intrinsics to scalar operation if the function has +/// optnone attribute or it is O0. +FunctionPass *createX86LowerAMXIntrinsicsPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &, const X86RegisterBankInfo &); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec..4393f6e 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { if (!(Subtarget->hasVLX() || NVT.is512BitVector())) return false; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - auto getFoldableLogicOp = [](SDValue Op) { // Peek through single use bitcast. if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) @@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { return SDValue(); }; - SDValue A, FoldableOp; - if ((FoldableOp = getFoldableLogicOp(N1))) { - A = N0; - } else if ((FoldableOp = getFoldableLogicOp(N0))) { - A = N1; - } else - return false; + SDValue N0, N1, A, FoldableOp; + + // Identify and (optionally) peel an outer NOT that wraps a pure logic tree + auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { + if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && + ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { + SDValue InnerOp = Op->getOperand(0); + + if (!getFoldableLogicOp(InnerOp)) + return SDValue(); + + N0 = InnerOp.getOperand(0); + N1 = InnerOp.getOperand(1); + if ((FoldableOp = getFoldableLogicOp(N1))) { + A = N0; + return InnerOp; + } + if ((FoldableOp = getFoldableLogicOp(N0))) { + A = N1; + return InnerOp; + } + } + return SDValue(); + }; + + bool PeeledOuterNot = false; + SDNode *OriN = N; + if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) { + PeeledOuterNot = true; + N = InnerOp.getNode(); + } else { + N0 = N->getOperand(0); + N1 = N->getOperand(1); + + if ((FoldableOp = getFoldableLogicOp(N1))) + A = N0; + else if ((FoldableOp = getFoldableLogicOp(N0))) + A = N1; + else + return false; + } SDValue B = FoldableOp.getOperand(0); SDValue C = FoldableOp.getOperand(1); @@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { case ISD::XOR: Imm ^= TernlogMagicA; break; } - return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); + if (PeeledOuterNot) + Imm = ~Imm; + + return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm); } /// If the high bits of an 'and' operand are known zero, try setting the diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4dfc400..410f20e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57617,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, } // Fold any similar generic ADD/SUB opcodes to reuse this node. - auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { + auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) { SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) { SDValue Op(N, 0); if (Negate) { // Bail if this is only used by a user of the x86 add/sub. @@ -57632,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, DCI.CombineTo(GenericAddSub, Op); } }; - MatchGeneric(LHS, RHS, false); - MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); + MatchGeneric(GenericOpc, LHS, RHS, false); + MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode()); + + if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + if (X86ISD::SUB == N->getOpcode()) { + // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C). + MatchGeneric(ISD::ADD, LHS, NegC, false); + } else { + // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS). + MatchGeneric(ISD::SUB, NegC, LHS, true); + } + } else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) { + if (X86ISD::SUB == N->getOpcode()) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C). + MatchGeneric(ISD::ADD, RHS, NegC, true); + } + } // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the // EFLAGS result doesn't change. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e28b9c1..b7151f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1592,7 +1592,6 @@ namespace llvm { bool useLoadStackGuardNode(const Module &M) const override; bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; - Function *getSSPStackGuardCheck(const Module &M) const override; SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const override; diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 37d7772..a61bbe5 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -640,15 +640,6 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const { TargetLowering::insertSSPDeclarations(M); } -Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { - // MSVC CRT has a function to validate security cookie. - if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || - Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { - return M.getFunction("__security_check_cookie"); - } - return TargetLowering::getSSPStackGuardCheck(M); -} - Value * X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { // Android provides a fixed TLS slot for the SafeStack pointer. See the |
