aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td104
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td12
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp26
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td39
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGISel.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp16
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp25
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
24 files changed, 197 insertions, 258 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 70d5ad7d..dc8e7c8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16461,7 +16461,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32));
+ DAG.getTargetConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
@@ -16491,7 +16491,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
- DAG.getConstant(Cnt, DL, MVT::i32), Op->getFlags());
+ DAG.getTargetConstant(Cnt, DL, MVT::i32),
+ Op->getFlags());
}
// Right shift register. Note, there is not a shift right register
@@ -19973,7 +19974,7 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
- Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ Op->getOperand(0), DAG.getTargetConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
@@ -20696,7 +20697,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100);
SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100);
SDValue NewShiftConstant =
- DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
+ DAG.getTargetConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant);
}
@@ -22373,14 +22374,14 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32));
+ DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32, true));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
return Op;
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
Op = DAG.getNode(Opcode, DL, VT, Op,
- DAG.getConstant(ShiftAmount, DL, MVT::i32));
+ DAG.getTargetConstant(ShiftAmount, DL, MVT::i32));
if (N->getValueType(0) == MVT::i64)
Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
DAG.getConstant(0, DL, MVT::i64));
@@ -23198,7 +23199,7 @@ static SDValue performZExtUZPCombine(SDNode *N, SelectionDAG &DAG) {
Op.getOperand(ExtOffset == 0 ? 0 : 1));
if (Shift != 0)
BC = DAG.getNode(AArch64ISD::VLSHR, DL, VT, BC,
- DAG.getConstant(Shift, DL, MVT::i32));
+ DAG.getTargetConstant(Shift, DL, MVT::i32));
return DAG.getNode(ISD::AND, DL, VT, BC, DAG.getConstant(Mask, DL, VT));
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 6ef0a95..09ce713 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -812,49 +812,49 @@ def fixedpoint_recip_f16_i64 : fixedpoint_recip_i64<f16>;
def fixedpoint_recip_f32_i64 : fixedpoint_recip_i64<f32>;
def fixedpoint_recip_f64_i64 : fixedpoint_recip_i64<f64>;
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR8OpValue";
let DecoderMethod = "DecodeVecShiftR8Imm";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16Imm";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR16Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
let ParserMatchClass = Imm1_8Operand;
}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR32Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
let ParserMatchClass = Imm1_16Operand;
}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
let DecoderMethod = "DecodeVecShiftR64Imm";
let ParserMatchClass = Imm1_64Operand;
}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftR64Narrow : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR64OpValue";
@@ -862,37 +862,6 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
-// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR8OpValue";
- let DecoderMethod = "DecodeVecShiftR8Imm";
- let ParserMatchClass = Imm1_8Operand;
-}
-def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16Imm";
- let ParserMatchClass = Imm1_16Operand;
-}
-def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32Imm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64Imm";
- let ParserMatchClass = Imm1_64Operand;
-}
-
def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm1_1Operand : AsmImmRange<1, 1>;
@@ -904,28 +873,28 @@ def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
def Imm0_63Operand : AsmImmRange<0, 63>;
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 8);
}]> {
let EncoderMethod = "getVecShiftL8OpValue";
let DecoderMethod = "DecodeVecShiftL8Imm";
let ParserMatchClass = Imm0_7Operand;
}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 16);
}]> {
let EncoderMethod = "getVecShiftL16OpValue";
let DecoderMethod = "DecodeVecShiftL16Imm";
let ParserMatchClass = Imm0_15Operand;
}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 32);
}]> {
let EncoderMethod = "getVecShiftL32OpValue";
let DecoderMethod = "DecodeVecShiftL32Imm";
let ParserMatchClass = Imm0_31Operand;
}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+def vecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) < 64);
}]> {
let EncoderMethod = "getVecShiftL64OpValue";
@@ -933,36 +902,6 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
-// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
-// (ImmLeaf)
-def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 8);
-}]> {
- let EncoderMethod = "getVecShiftL8OpValue";
- let DecoderMethod = "DecodeVecShiftL8Imm";
- let ParserMatchClass = Imm0_7Operand;
-}
-def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 16);
-}]> {
- let EncoderMethod = "getVecShiftL16OpValue";
- let DecoderMethod = "DecodeVecShiftL16Imm";
- let ParserMatchClass = Imm0_15Operand;
-}
-def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 32);
-}]> {
- let EncoderMethod = "getVecShiftL32OpValue";
- let DecoderMethod = "DecodeVecShiftL32Imm";
- let ParserMatchClass = Imm0_31Operand;
-}
-def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
- return (((uint32_t)Imm) < 64);
-}]> {
- let EncoderMethod = "getVecShiftL64OpValue";
- let DecoderMethod = "DecodeVecShiftL64Imm";
- let ParserMatchClass = Imm0_63Operand;
-}
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
@@ -10232,7 +10171,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10240,15 +10179,16 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
} // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10256,7 +10196,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10264,7 +10204,7 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
@@ -10276,7 +10216,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
V64, V64, vecshiftR16,
asm, ".4h", ".4h",
- [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10284,7 +10224,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
V128, V128, vecshiftR16,
asm, ".8h", ".8h",
- [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 vecshiftR16:$imm)))]> {
bits<4> imm;
let Inst{19-16} = imm;
}
@@ -10293,7 +10233,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10301,7 +10241,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
V128, V128, vecshiftR32,
asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 vecshiftR32:$imm)))]> {
bits<5> imm;
let Inst{20-16} = imm;
}
@@ -10309,7 +10249,7 @@ multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
V128, V128, vecshiftR64,
asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 vecshiftR64:$imm)))]> {
bits<6> imm;
let Inst{21-16} = imm;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 96cc3f3..3e55b76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2957,9 +2957,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
- if (Order != AtomicOrdering::NotAtomic &&
- Order != AtomicOrdering::Unordered &&
- Order != AtomicOrdering::Monotonic) {
+ if (isStrongerThanMonotonic(Order)) {
assert(!isa<GZExtLoad>(LdSt));
assert(MemSizeInBytes <= 8 &&
"128-bit atomics should already be custom-legalized");
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 6025f1c..63313da 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -556,8 +556,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned NewOpc =
Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
MachineIRBuilder MIB(MI);
- auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
- MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
+ MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 539470d..be44b8f 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -4967,7 +4967,7 @@ multiclass sme2_movaz_array_to_vec_vg4_multi<string mnemonic> {
//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<4> imm4;
@@ -4985,7 +4985,7 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
- def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -5008,20 +5008,20 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
}
multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
- def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
+ def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
mnemonic>{
bits<5> imm;
let Inst{20-16} = imm;
}
- def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
+ def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
mnemonic> {
bits<6> imm;
let Inst{22} = imm{5};
let Inst{20-16} = imm{4-0};
}
- def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
- def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, vecshiftR32>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, vecshiftR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9a23c35..3cdd505 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4436,9 +4436,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
ZPR64, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i64, op, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4481,10 +4481,10 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
@@ -4501,10 +4501,10 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -4546,10 +4546,10 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
def : SVE_Shift_Add_All_Active_Pat<nxv16i8, shift_op, nxv16i1, nxv16i8, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
def : SVE_Shift_Add_All_Active_Pat<nxv8i16, shift_op, nxv8i1, nxv8i16, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
@@ -4676,18 +4676,18 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -4717,18 +4717,18 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- tvecshiftR8>;
+ vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- tvecshiftR16> {
+ vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
- tvecshiftR32> {
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
@@ -5461,10 +5461,10 @@ multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
let Inst{20-19} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -6443,10 +6443,10 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftL64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6460,15 +6460,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
- def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
- def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
- def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
- def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftL8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftL16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftL32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftL64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
@@ -6489,10 +6489,10 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
let Inst{9-8} = imm{4-3};
}
- def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
// As above but shift amount takes the form of a "vector immediate".
@@ -6511,10 +6511,10 @@ multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op =
def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, vecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, vecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, vecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, vecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -10031,7 +10031,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
// SVE2 multi-vec shift narrow
class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -10055,7 +10055,7 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
- def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
+ def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, vecshiftR16>;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2ba3156..9dd64e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -131,10 +131,8 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
-/// Returns true if the function requires the implicit argument be passed
-/// regardless of the function contents.
-static bool funcRequiresHostcallPtr(const Function &F) {
- // Sanitizers require the hostcall buffer passed in the implicit arguments.
+/// Returns true if sanitizer attributes are present on a function.
+static bool hasSanitizerAttributes(const Function &F) {
return F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeThread) ||
F.hasFnAttribute(Attribute::SanitizeMemory) ||
@@ -469,15 +467,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// If the function requires the implicit arg pointer due to sanitizers,
// assume it's needed even if explicitly marked as not requiring it.
- const bool NeedsHostcall = funcRequiresHostcallPtr(*F);
- if (NeedsHostcall) {
+ // Flat scratch initialization is needed because `asan_malloc_impl`
+ // calls introduced later in pipeline will have flat scratch accesses.
+ // FIXME: FLAT_SCRATCH_INIT will not be required here if device-libs
+ // implementation for `asan_malloc_impl` is updated.
+ const bool HasSanitizerAttrs = hasSanitizerAttributes(*F);
+ if (HasSanitizerAttrs) {
removeAssumedBits(IMPLICIT_ARG_PTR);
removeAssumedBits(HOSTCALL_PTR);
+ removeAssumedBits(FLAT_SCRATCH_INIT);
}
for (auto Attr : ImplicitAttrs) {
- if (NeedsHostcall &&
- (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))
+ if (HasSanitizerAttrs &&
+ (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||
+ Attr.first == FLAT_SCRATCH_INIT))
continue;
if (F->hasFnAttribute(Attr.second))
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 82789bc..90c828b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -932,7 +932,9 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
for (MachineInstr *SubDef = MRI.getVRegDef(SrcReg);
SubDef && TII.isFoldableCopy(*SubDef);
SubDef = MRI.getVRegDef(Sub->getReg())) {
- MachineOperand &SrcOp = SubDef->getOperand(1);
+ unsigned SrcIdx = TII.getFoldableCopySrcIdx(*SubDef);
+ MachineOperand &SrcOp = SubDef->getOperand(SrcIdx);
+
if (SrcOp.isImm())
return &SrcOp;
if (!SrcOp.isReg() || SrcOp.getReg().isPhysical())
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 79876ff..e233457 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -18860,31 +18860,6 @@ SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
return Flags;
}
-bool SITargetLowering::checkForPhysRegDependency(
- SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII, MCRegister &PhysReg, int &Cost) const {
- if (User->getOpcode() != ISD::CopyToReg)
- return false;
- if (!Def->isMachineOpcode())
- return false;
- MachineSDNode *MDef = dyn_cast<MachineSDNode>(Def);
- if (!MDef)
- return false;
-
- unsigned ResNo = User->getOperand(Op).getResNo();
- if (User->getOperand(Op)->getValueType(ResNo) != MVT::i1)
- return false;
- const MCInstrDesc &II = TII->get(MDef->getMachineOpcode());
- if (II.isCompare() && II.hasImplicitDefOfPhysReg(AMDGPU::SCC)) {
- PhysReg = AMDGPU::SCC;
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(PhysReg, Def->getSimpleValueType(ResNo));
- Cost = RC->expensiveOrImpossibleToCopy() ? -1 : RC->getCopyCost();
- return true;
- }
- return false;
-}
-
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a474dab..74e58f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -561,11 +561,6 @@ public:
bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const;
bool denormalsEnabledForType(LLT Ty, const MachineFunction &MF) const;
- bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- MCRegister &PhysReg, int &Cost) const override;
-
bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts,
const SelectionDAG &DAG, bool SNaN = false,
unsigned Depth = 0) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cda8069..46757cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3433,6 +3433,32 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
}
}
+unsigned SIInstrInfo::getFoldableCopySrcIdx(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B16_t16_e32:
+ case AMDGPU::V_MOV_B16_t16_e64:
+ return 2;
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
+ case AMDGPU::V_MOV_B64_e32:
+ case AMDGPU::V_MOV_B64_e64:
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO:
+ case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ case AMDGPU::V_ACCVGPR_READ_B32_e64:
+ case AMDGPU::V_ACCVGPR_MOV_B32:
+ case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
+ case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
+ return 1;
+ default:
+ llvm_unreachable("MI is not a foldable copy");
+ }
+}
+
static constexpr AMDGPU::OpName ModifierOpNames[] = {
AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index a21089f..cc59acf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -417,6 +417,7 @@ public:
const MachineInstr &MIb) const override;
static bool isFoldableCopy(const MachineInstr &MI);
+ static unsigned getFoldableCopySrcIdx(const MachineInstr &MI);
void removeModOperands(MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 5630580..82fc240 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -367,19 +367,6 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let BaseClassOrder = 10000;
}
-def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
- let CopyCost = 1;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
-def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16, (add M0_LO16)> {
- let CopyCost = 1;
- let Size = 16;
- let isAllocatable = 0;
- let HasSGPR = 1;
-}
-
// TODO: Do we need to set DwarfRegAlias on register tuples?
def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
@@ -774,12 +761,6 @@ def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", Reg128Types.types, 32,
let BaseClassOrder = 10000;
}
-def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
- (add LDS_DIRECT)> {
- let isAllocatable = 0;
- let CopyCost = -1;
-}
-
let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
@@ -797,7 +778,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16, SRC_EXECZ_LO16, SRC_SCC_LO16,
- EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
+ EXEC_LO_LO16, EXEC_HI_LO16, M0_LO16, SRC_FLAT_SCRATCH_BASE_LO_LO16,
SRC_FLAT_SCRATCH_BASE_HI_LO16)> {
let Size = 16;
let isAllocatable = 0;
@@ -805,7 +786,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16, bf16], 16,
}
def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
- (add SReg_32_XM0_XEXEC, M0_CLASS)> {
+ (add SReg_32_XM0_XEXEC, M0)> {
let AllocationPriority = 0;
}
@@ -830,7 +811,7 @@ def APERTURE_Class : SIRegisterClass<"AMDGPU", Reg64Types.types, 32,
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16, i1], 32,
- (add SReg_32_XM0, M0_CLASS)> {
+ (add SReg_32_XM0, M0)> {
let AllocationPriority = 0;
let HasSGPR = 1;
let BaseClassOrder = 32;
@@ -842,7 +823,7 @@ def SGPR_NULL256 : SIReg<"null">;
let GeneratePressureSet = 0 in {
def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
- (add SReg_32, LDS_DIRECT_CLASS)> {
+ (add SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasSGPR = 1;
let Size = 32;
@@ -981,7 +962,7 @@ defm "" : SRegClass<32, Reg1024Types.types, SGPR_1024Regs>;
}
def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
- (add VGPR_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 32;
@@ -1096,21 +1077,21 @@ def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> {
}
def VS_16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add VGPR_16, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_16, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 16;
}
def VS_16_Lo128 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
- (add VGPR_16_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_16_Lo128, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let Size = 16;
}
def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
- (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
@@ -1118,7 +1099,7 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v
}
def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
- (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32_Lo128, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
@@ -1126,7 +1107,7 @@ def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2
}
def VS_32_Lo256 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
- (add VGPR_32_Lo256, SReg_32, LDS_DIRECT_CLASS)> {
+ (add VGPR_32_Lo256, SReg_32, LDS_DIRECT)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasSGPR = 1;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0040504..a94e131 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -359,6 +359,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
@@ -372,6 +374,8 @@ HexagonTargetLowering::initializeHVXLowering() {
setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
// Boolean vectors.
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 7d4535a..b37b740 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1560,7 +1560,7 @@ static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
// If it's not a grouped vector register, it doesn't have subregister, so
// the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
+ if (!BaseReg.isValid())
BaseReg = Reg;
return BaseReg;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td
index cf6f83a..7f5d0af 100644
--- a/llvm/lib/Target/RISCV/RISCVGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVGISel.td
@@ -126,13 +126,6 @@ let Predicates = [HasAtomicLdSt, IsRV64] in {
// RV64 i32 patterns not used by SelectionDAG
//===----------------------------------------------------------------------===//
-def uimm5i32 : ImmLeaf<i32, [{return isUInt<5>(Imm);}]>;
-
-def zext_is_sext : PatFrag<(ops node:$src), (zext node:$src), [{
- KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0), 0);
- return Known.isNonNegative();
-}]>;
-
let Predicates = [IsRV64] in {
def : LdPat<extloadi8, LBU, i32>; // Prefer unsigned due to no c.lb in Zcb.
def : LdPat<extloadi16, LH, i32>;
@@ -140,15 +133,10 @@ def : LdPat<extloadi16, LH, i32>;
def : StPat<truncstorei8, SB, GPR, i32>;
def : StPat<truncstorei16, SH, GPR, i32>;
-def : Pat<(anyext (i32 GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
-def : Pat<(i32 (trunc GPR:$src)), (COPY GPR:$src)>;
def : Pat<(sext_inreg (i64 (add GPR:$rs1, simm12_lo:$imm)), i32),
(ADDIW GPR:$rs1, simm12_lo:$imm)>;
-
-// Use sext if the sign bit of the input is 0.
-def : Pat<(zext_is_sext (i32 GPR:$src)), (ADDIW GPR:$src, 0)>;
}
let Predicates = [IsRV64, NoStdExtZba] in
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 6a6ead2..cf8d120 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -128,7 +128,7 @@ static bool hasUndefinedPassthru(const MachineInstr &MI) {
// All undefined passthrus should be $noreg: see
// RISCVDAGToDAGISel::doPeepholeNoRegPassThru
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef();
+ return !UseMO.getReg().isValid() || UseMO.isUndef();
}
/// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs.
@@ -1454,7 +1454,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
Register Reg = VLOp.getReg();
// Erase the AVL operand from the instruction.
- VLOp.setReg(RISCV::NoRegister);
+ VLOp.setReg(Register());
VLOp.setIsKill(false);
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
@@ -1663,7 +1663,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (!MO.isReg() || !MO.getReg().isVirtual())
return;
Register OldVLReg = MO.getReg();
- MO.setReg(RISCV::NoRegister);
+ MO.setReg(Register());
if (LIS)
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1e6b04f8..7db4832 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1364,7 +1364,7 @@ void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
/*RestoreAfter=*/false, /*SpAdj=*/0,
/*AllowSpill=*/false);
- if (TmpGPR != RISCV::NoRegister)
+ if (TmpGPR.isValid())
RS->setRegUsed(TmpGPR);
else {
// The case when there is no scavenged register needs special handling.
@@ -3021,7 +3021,7 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
ErrInfo = "Invalid operand type for VL operand";
return false;
}
- if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
+ if (Op.isReg() && Op.getReg().isValid()) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index efdbd12..447f05c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1417,9 +1417,9 @@ class SelectQCbi<CondCode Cond, DAGOperand InTyImm, Pseudo OpNode >
let Predicates = [HasVendorXqciac, IsRV32] in {
def : Pat<(i32 (add GPRNoX0:$rd, (mul GPRNoX0:$rs1, simm12_lo:$imm12))),
(QC_MULIADD GPRNoX0:$rd, GPRNoX0:$rs1, simm12_lo:$imm12)>;
-def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, uimm5gt3:$imm), GPRNoX0:$rs2)),
+def : Pat<(i32 (add_like_non_imm12 (shl GPRNoX0:$rs1, (i32 uimm5gt3:$imm)), GPRNoX0:$rs2)),
(QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
-def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, uimm5gt3:$imm, GPRNoX0:$rs2)),
+def : Pat<(i32 (riscv_shl_add GPRNoX0:$rs1, (i32 uimm5gt3:$imm), GPRNoX0:$rs2)),
(QC_SHLADD GPRNoX0:$rs1, GPRNoX0:$rs2, uimm5gt3:$imm)>;
} // Predicates = [HasVendorXqciac, IsRV32]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
index 1674c95..1dd7332 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZalasr.td
@@ -26,7 +26,7 @@ class LAQ_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class SRL_r<bit aq, bit rl, bits<3> funct3, string opcodestr>
: RVInstRAtomic<0b00111, aq, rl, funct3, OPC_AMO,
- (outs ), (ins GPRMemZeroOffset:$rs1, GPR:$rs2),
+ (outs), (ins GPR:$rs2, GPRMemZeroOffset:$rs1),
opcodestr, "$rs2, $rs1"> {
let rd = 0;
}
@@ -71,7 +71,7 @@ class PatLAQ<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
// while atomic_store has data, addr
class PatSRL<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(OpNode (vt GPR:$rs2), (XLenVT GPRMemZeroOffset:$rs1)),
- (Inst GPRMemZeroOffset:$rs1, GPR:$rs2)>;
+ (Inst GPR:$rs2, GPRMemZeroOffset:$rs1)>;
let Predicates = [HasStdExtZalasr] in {
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index f8d33ae..54569b1 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -259,7 +259,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
if (isCompressibleLoad(MI) || isCompressibleStore(MI)) {
const MachineOperand &MOImm = MI.getOperand(2);
if (!MOImm.isImm())
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
int64_t Offset = MOImm.getImm();
int64_t NewBaseAdjust = getBaseAdjustForCompression(Offset, Opcode);
@@ -292,7 +292,7 @@ static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
}
}
}
- return RegImmPair(RISCV::NoRegister, 0);
+ return RegImmPair(Register(), 0);
}
// Check all uses after FirstMI of the given register, keeping a vector of
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index ffba284..fdf9a4f 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -382,7 +382,7 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -448,7 +448,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
Register FalseReg = MI.getOperand(2).getReg();
if (TruePassthruReg != FalseReg) {
// If True's passthru is undef see if we can change it to False
- if (TruePassthruReg != RISCV::NoRegister ||
+ if (TruePassthruReg.isValid() ||
!MRI->hasOneUse(MI.getOperand(3).getReg()) ||
!ensureDominates(MI.getOperand(2), *True))
return false;
@@ -467,7 +467,7 @@ bool RISCVVectorPeephole::convertSameMaskVMergeToVMv(MachineInstr &MI) {
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and passthru operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
@@ -517,7 +517,7 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
if (RISCVII::isFirstDefTiedToFirstUse(MaskedMCID)) {
unsigned PassthruOpIdx = MI.getNumExplicitDefs();
if (HasPassthru) {
- if (MI.getOperand(PassthruOpIdx).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(PassthruOpIdx).getReg())
MRI->recomputeRegClass(MI.getOperand(PassthruOpIdx).getReg());
} else
MI.removeOperand(PassthruOpIdx);
@@ -576,7 +576,7 @@ static bool dominates(MachineBasicBlock::const_iterator A,
bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
MachineInstr &Src) const {
assert(MO.getParent()->getParent() == Src.getParent());
- if (!MO.isReg() || MO.getReg() == RISCV::NoRegister)
+ if (!MO.isReg() || !MO.getReg().isValid())
return true;
MachineInstr *Def = MRI->getVRegDef(MO.getReg());
@@ -593,7 +593,7 @@ bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO,
bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
return false;
- if (MI.getOperand(1).getReg() != RISCV::NoRegister)
+ if (MI.getOperand(1).getReg().isValid())
return false;
// If the input was a pseudo with a policy operand, we can give it a tail
@@ -654,7 +654,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
// Src needs to have the same passthru as VMV_V_V
MachineOperand &SrcPassthru = Src->getOperand(Src->getNumExplicitDefs());
- if (SrcPassthru.getReg() != RISCV::NoRegister &&
+ if (SrcPassthru.getReg().isValid() &&
SrcPassthru.getReg() != Passthru.getReg())
return false;
@@ -672,7 +672,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
if (SrcPassthru.getReg() != Passthru.getReg()) {
SrcPassthru.setReg(Passthru.getReg());
// If Src is masked then its passthru needs to be in VRNoV0.
- if (Passthru.getReg() != RISCV::NoRegister)
+ if (Passthru.getReg().isValid())
MRI->constrainRegClass(
Passthru.getReg(),
TII->getRegClass(Src->getDesc(), SrcPassthru.getOperandNo(), TRI));
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
index 7505507..e8c849e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp
@@ -188,8 +188,31 @@ class SPIRVLegalizePointerCast : public FunctionPass {
FixedVectorType *SrcType = cast<FixedVectorType>(Src->getType());
FixedVectorType *DstType =
cast<FixedVectorType>(GR->findDeducedElementType(Dst));
- assert(DstType->getNumElements() >= SrcType->getNumElements());
+ auto dstNumElements = DstType->getNumElements();
+ auto srcNumElements = SrcType->getNumElements();
+
+ // if the element type differs, it is a bitcast.
+ if (DstType->getElementType() != SrcType->getElementType()) {
+ // Support bitcast between vectors of different sizes only if
+ // the total bitwidth is the same.
+ [[maybe_unused]] auto dstBitWidth =
+ DstType->getElementType()->getScalarSizeInBits() * dstNumElements;
+ [[maybe_unused]] auto srcBitWidth =
+ SrcType->getElementType()->getScalarSizeInBits() * srcNumElements;
+ assert(dstBitWidth == srcBitWidth &&
+ "Unsupported bitcast between vectors of different sizes.");
+
+ Src =
+ B.CreateIntrinsic(Intrinsic::spv_bitcast, {DstType, SrcType}, {Src});
+ buildAssignType(B, DstType, Src);
+ SrcType = DstType;
+
+ StoreInst *SI = B.CreateStore(Src, Dst);
+ SI->setAlignment(Alignment);
+ return SI;
+ }
+ assert(DstType->getNumElements() >= SrcType->getNumElements());
LoadInst *LI = B.CreateLoad(DstType, Dst);
LI->setAlignment(Alignment);
Value *OldValues = LI;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 02b20b3..931a10b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13783,10 +13783,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// so prevents folding a load into this instruction or making a copy.
const int UnpackLoMask[] = {0, 0, 1, 1};
const int UnpackHiMask[] = {2, 2, 3, 3};
- if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
- Mask = UnpackLoMask;
- else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
- Mask = UnpackHiMask;
+ if (!isSingleElementRepeatedMask(Mask)) {
+ if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2))
+ Mask = UnpackLoMask;
+ else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2))
+ Mask = UnpackHiMask;
+ }
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));