aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp35
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td5
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp1
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp18
5 files changed, 65 insertions, 6 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8a038f..8457f61 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22308,6 +22308,37 @@ static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
}
+// Attempt to combine the following patterns:
+// SUB x, (CSET LO, (CMP a, b)) -> SBC x, 0, (CMP a, b)
+// SUB (SUB x, y), (CSET LO, (CMP a, b)) -> SBC x, y, (CMP a, b)
+// The CSET may be preceded by a ZEXT.
+static SDValue performSubWithBorrowCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse())
+ N1 = N1.getOperand(0);
+ if (!N1.hasOneUse() || getCSETCondCode(N1) != AArch64CC::LO)
+ return SDValue();
+
+ SDValue Flags = N1.getOperand(3);
+ if (Flags.getOpcode() != AArch64ISD::SUBS)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue N0 = N->getOperand(0);
+ if (N0->getOpcode() == ISD::SUB)
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0.getOperand(0),
+ N0.getOperand(1), Flags);
+ return DAG.getNode(AArch64ISD::SBC, DL, VT, N0, DAG.getConstant(0, DL, VT),
+ Flags);
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
@@ -22329,6 +22360,8 @@ static SDValue performAddSubCombine(SDNode *N,
return Val;
if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
return Val;
+ if (SDValue Val = performSubWithBorrowCombine(N, DCI.DAG))
+ return Val;
if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
@@ -26050,7 +26083,7 @@ static SDValue performCSELCombine(SDNode *N,
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
- return Folded;
+ return Folded;
// CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x)
// if SUB(y, x) already exists and we can produce a swapped predicate for cc.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 76f076a..b30e3d0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4444,6 +4444,11 @@ defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch timm:$Rt,
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+// PRFM falls back to PRFUM for negative or unaligned offsets (not a multiple
+// of 8).
+def : InstAlias<"prfm $Rt, [$Rn, $offset]",
+ (PRFUMi prfop:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+
//---
// (unscaled immediate, unprivileged)
defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e1f4386..65b6077 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3597,6 +3597,18 @@ let Predicates = [HasSVE_or_SME] in {
def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
(SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
+
+ // Extracts of ``unsigned'' i8 or i16 elements lead to the zero-extend being
+ // transformed to an AND mask. The mask is redundant since UMOV already zeroes
+ // the high bits of the destination register.
+ def : Pat<(i32 (and (vector_extract nxv16i8:$vec, VectorIndexB:$index), 0xff)),
+ (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
+ def : Pat<(i32 (and (vector_extract nxv8i16:$vec, VectorIndexH:$index), 0xffff)),
+ (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)>;
+ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)))), (i64 0xff))),
+ (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)), sub_32)>;
+ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)))), (i64 0xffff))),
+ (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index)), sub_32)>;
} // End HasNEON
// Extract first element from vector.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 068954f..0bf2b31 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -54,7 +54,6 @@
#include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h"
#include <memory>
#include <optional>
-#include <string>
using namespace llvm;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 14b0f9a..3940246 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(Dst);
unsigned DstSize = DstTy.getSizeInBits();
+ assert((DstSize == 64 || DstSize == 128) &&
+ "Unexpected vector constant size");
+
if (CV->isNullValue()) {
if (DstSize == 128) {
auto Mov =
@@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
// Try to create the new constants with MOVI, and if so generate a fneg
// for it.
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
- Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
+ Register NewDst = MRI.createVirtualRegister(
+ DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
NewOp->getOperand(0).setReg(NewDst);
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
}
return nullptr;
};
MachineInstr *R;
- if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
- (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
+ if ((R = TryWithFNeg(DefBits, 32,
+ DstSize == 64 ? AArch64::FNEGv2f32
+ : AArch64::FNEGv4f32)) ||
+ (R = TryWithFNeg(DefBits, 64,
+ DstSize == 64 ? AArch64::FNEGDr
+ : AArch64::FNEGv2f64)) ||
(STI.hasFullFP16() &&
- (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
+ (R = TryWithFNeg(DefBits, 16,
+ DstSize == 64 ? AArch64::FNEGv4f16
+ : AArch64::FNEGv8f16))))
return R;
}