aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp140
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrP10.td189
3 files changed, 332 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 944a1e2..8bf0d11 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
return SDV;
}
+ // Recognize build vector patterns to emit VSX vector instructions
+ // instead of loading value from memory.
+ if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
+ return VecPat;
}
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
@@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N,
return SDValue();
}
+// LXVKQ instruction load VSX vector with a special quadword value
+// based on an immediate value. This helper method returns the details of the
+// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
+// to help generate the LXVKQ instruction and the subsequent shift instruction
+// required to match the original build vector pattern.
+
+// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
+using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
+
+static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
+
+ // LXVKQ instruction loads the Quadword value:
+ // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
+ static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
+ static const uint32_t Uim = 16;
+
+ // Check for direct LXVKQ match (no shift needed)
+ if (FullVal == BasePattern)
+ return std::make_tuple(Uim, uint8_t{0});
+
+ // Check if FullValue is 1 (the result of the base pattern >> 127)
+ if (FullVal == APInt(128, 1))
+ return std::make_tuple(Uim, uint8_t{127});
+
+ return std::nullopt;
+}
+
+/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
+/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
+/// LXVKQ instruction load VSX vector with a special quadword value based on an
+/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
+/// This can be used to inline the build vector constants that have the
+/// following patterns:
+///
+/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
+/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
+/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
+/// combination of splatting and right shift instructions.
+
+SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
+ "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
+
+ // This transformation is only supported if we are loading either a byte,
+ // halfword, word, or doubleword.
+ EVT VT = Op.getValueType();
+ if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64))
+ return SDValue();
+
+ LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
+ << VT.getEVTString() << "): ";
+ Op->dump());
+
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned ElemBits = VT.getScalarSizeInBits();
+
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
+
+ // Check for Non-constant operand in the build vector.
+ for (const SDValue &Operand : Op.getNode()->op_values()) {
+ if (!isa<ConstantSDNode>(Operand))
+ return SDValue();
+ }
+
+ // Assemble build vector operands as a 128-bit register value
+ // We need to reconstruct what the 128-bit register pattern would be
+ // that produces this vector when interpreted with the current endianness
+ APInt FullVal = APInt::getZero(128);
+
+ for (unsigned Index = 0; Index < NumElems; ++Index) {
+ auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
+
+ // Get element value as raw bits (zero-extended)
+ uint64_t ElemValue = C->getZExtValue();
+
+ // Mask to element size to ensure we only get the relevant bits
+ if (ElemBits < 64)
+ ElemValue &= ((1ULL << ElemBits) - 1);
+
+ // Calculate bit position for this element in the 128-bit register
+ unsigned BitPos =
+ (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
+
+ // Create APInt for the element value and shift it to correct position
+ APInt ElemAPInt(128, ElemValue);
+ ElemAPInt <<= BitPos;
+
+ // Place the element value at the correct bit position
+ FullVal |= ElemAPInt;
+ }
+
+ if (FullVal.isZero() || FullVal.isAllOnes())
+ return SDValue();
+
+ if (auto UIMOpt = getPatternInfo(FullVal)) {
+ const auto &[Uim, ShiftAmount] = *UIMOpt;
+ SDLoc Dl(Op);
+
+ // Generate LXVKQ instruction if the shift amount is zero.
+ if (ShiftAmount == 0) {
+ SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
+ SDValue LxvkqInstr =
+ SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "combineBVLoadsSpecialValue: Instruction Emitted ";
+ LxvkqInstr.dump());
+ return LxvkqInstr;
+ }
+
+ assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
+
+ // The right shifted pattern can be constructed using a combination of
+ // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
+ // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
+ // value 255.
+ SDValue ShiftAmountVec =
+ SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
+ DAG.getTargetConstant(255, Dl, MVT::i32)),
+ 0);
+ // Generate appropriate right shift instruction
+ SDValue ShiftVec = SDValue(
+ DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
+ 0);
+ LLVM_DEBUG(llvm::dbgs()
+ << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
+ ShiftVec.dump());
+ return ShiftVec;
+ }
+ // No patterns matched for build vectors.
+ return SDValue();
+}
+
/// Reduce the number of loads when building a vector.
///
/// Building a vector out of multiple loads can be converted to a load
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 59f3387..880aca7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1472,6 +1472,9 @@ namespace llvm {
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
+ SDValue combineBVLoadsSpecialValue(SDValue Operand,
+ SelectionDAG &DAG) const;
+
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 2384959..2d8c633 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> {
126>;
}
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C),
+// NOT(B), NAND(B,C)}
+// - C is the "false" case op NOR(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNor<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 129>;
+
+ // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>;
+
+ // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)),
+ 133>;
+
+ // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 134>;
+
+ // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 138>;
+
+ // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)),
+ 140>;
+
+ // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)),
+ 142>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B),
+// NOT(C)}
+// - C is the "false" case op EQV(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectEqv<ValueType Vt>{
+ // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 151>;
+
+ // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 152>;
+
+ // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 154>;
+
+ // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)),
+ 158>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// B, NOT(B)}
+// - C is the "false" case op NOT(C)
+// =============================================================================
+multiclass XXEvalTernarySelectNotC<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>;
+
+ // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>;
+
+ // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>;
+
+ // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C),
+// C, NOT(B)}
+// - C is the "false" case op NOT(B)
+// =============================================================================
+multiclass XXEvalTernarySelectNotB<ValueType Vt>{
+ // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>;
+
+ // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>;
+
+ // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>;
+
+ // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>;
+
+ // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202
+ def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>;
+
+ // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>;
+}
+
+// =============================================================================
+// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand
+// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C)
+// and emit the corresponding xxeval instruction with the imm value.
+//
+// The patterns implement xxeval vector select operations where:
+// - A is the selector vector
+// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)}
+// - C is the "false" case op NAND(B,C)
+// =============================================================================
+multiclass XXEvalTernarySelectNand<ValueType Vt>{
+ // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>;
+
+ // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>;
+
+ // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 230>;
+
+ // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 231>;
+
+ // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233
+ def : XXEvalPattern<
+ Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)),
+ 233>;
+}
+
let Predicates = [PrefixInstrs, HasP10Vector] in {
let AddedComplexity = 400 in {
def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A,
@@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
defm : XXEvalTernarySelectC<Ty>;
defm : XXEvalTernarySelectXor<Ty>;
defm : XXEvalTernarySelectOr<Ty>;
+ defm : XXEvalTernarySelectNor<Ty>;
+ defm : XXEvalTernarySelectEqv<Ty>;
+ defm : XXEvalTernarySelectNotC<Ty>;
+ defm : XXEvalTernarySelectNotB<Ty>;
+ defm : XXEvalTernarySelectNand<Ty>;
}
// Anonymous patterns to select prefixed VSX loads and stores.