diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86LowerAMXType.cpp | 30 |
3 files changed, 50 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp index 143c4c4..e7709ef 100644 --- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp +++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp @@ -149,6 +149,10 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, }); } + getActionDefinitionsBuilder({G_UMIN, G_UMAX, G_SMIN, G_SMAX}) + .widenScalarToNextPow2(0, /*Min=*/32) + .lower(); + // integer addition/subtraction getActionDefinitionsBuilder({G_ADD, G_SUB}) .legalFor({s8, s16, s32}) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cda5568..931a10b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, // so prevents folding a load into this instruction or making a copy. const int UnpackLoMask[] = {0, 0, 1, 1}; const int UnpackHiMask[] = {2, 2, 3, 3}; - if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) - Mask = UnpackLoMask; - else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) - Mask = UnpackHiMask; + if (!isSingleElementRepeatedMask(Mask)) { + if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) + Mask = UnpackLoMask; + else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) + Mask = UnpackHiMask; + } return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); @@ -45457,7 +45459,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, const SDLoc &DL, const X86Subtarget &Subtarget) { EVT SrcVT = Src.getValueType(); - if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) + if (Subtarget.useSoftFloat() || !SrcVT.isSimple() || + SrcVT.getScalarType() != MVT::i1) return SDValue(); // Recognize the IR pattern for the movmsk intrinsic under SSE1 before type @@ -58134,6 +58137,14 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) return V; + // Prefer VSHLI to reduce uses, X86FixupInstTunings may revert this depending + // on the scheduler model. Limit multiple users to AVX+ targets to prevent + // introducing extra register moves. + if (Op0 == Op1 && supportedVectorShiftWithImm(VT, Subtarget, ISD::SHL)) + if (Subtarget.hasAVX() || N->isOnlyUserOf(Op0.getNode())) + return getTargetVShiftByConstNode(X86ISD::VSHLI, DL, VT.getSimpleVT(), + Op0, 1, DAG); + // Canonicalize hidden LEA pattern: // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) // iff c < 4 diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 278ae46..0ba71ad 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -854,6 +854,7 @@ public: : Func(F), SC(ShapeC), DT(nullptr) {} bool combineCastStore(IntrinsicInst *Cast, StoreInst *ST); bool combineLoadCast(IntrinsicInst *Cast, LoadInst *LD); + bool combineTilezero(IntrinsicInst *Cast); bool combineLdSt(SmallVectorImpl<Instruction *> &Casts); bool combineAMXcast(TargetLibraryInfo *TLI); bool transformAMXCast(IntrinsicInst *AMXCast); @@ -1175,6 +1176,26 @@ bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) { return EraseLoad; } +// %19 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> zeroinitializer) +// --> +// %19 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col) +bool X86LowerAMXCast::combineTilezero(IntrinsicInst *Cast) { + Value *Row = nullptr, *Col = nullptr; + Use &U = *(Cast->use_begin()); + unsigned OpNo = U.getOperandNo(); + auto *II = cast<IntrinsicInst>(U.getUser()); + if (!isAMXIntrinsic(II)) + return false; + + std::tie(Row, Col) = SC->getShape(II, OpNo); + + IRBuilder<> Builder(Cast); + Value *NewInst = + Builder.CreateIntrinsic(Intrinsic::x86_tilezero_internal, {}, {Row, Col}); + Cast->replaceAllUsesWith(NewInst); + return true; +} + bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) { bool Change = false; for (auto *Cast : Casts) { @@ -1198,6 +1219,14 @@ bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) { for (auto *Store : DeadStores) Store->eraseFromParent(); } else { // x86_cast_vector_to_tile + // %19 = tail call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> zeroinitializer) + // --> + // %19 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col) + if (isa<ConstantAggregateZero>(Cast->getOperand(0))) { + Change |= combineTilezero(cast<IntrinsicInst>(Cast)); + continue; + } + auto *Load = dyn_cast<LoadInst>(Cast->getOperand(0)); if (!Load || !Load->hasOneUse()) continue; @@ -1210,6 +1239,7 @@ bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) { // Set the operand is null so that load instruction can be erased. Cast->setOperand(0, nullptr); Load->eraseFromParent(); + Change = true; } } } |