diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 140 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrP10.td | 189 |
3 files changed, 332 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 944a1e2..8bf0d11 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9702,6 +9702,10 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } return SDV; } + // Recognize build vector patterns to emit VSX vector instructions + // instead of loading value from memory. + if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG)) + return VecPat; } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -15696,6 +15700,142 @@ combineElementTruncationToVectorTruncation(SDNode *N, return SDValue(); } +// LXVKQ instruction load VSX vector with a special quadword value +// based on an immediate value. This helper method returns the details of the +// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount} +// to help generate the LXVKQ instruction and the subsequent shift instruction +// required to match the original build vector pattern. + +// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount} +using LXVKQPattern = std::tuple<uint32_t, uint8_t>; + +static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) { + + // LXVKQ instruction loads the Quadword value: + // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000 + static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64; + static const uint32_t Uim = 16; + + // Check for direct LXVKQ match (no shift needed) + if (FullVal == BasePattern) + return std::make_tuple(Uim, uint8_t{0}); + + // Check if FullValue is 1 (the result of the base pattern >> 127) + if (FullVal == APInt(128, 1)) + return std::make_tuple(Uim, uint8_t{127}); + + return std::nullopt; +} + +/// Combine vector loads to a single load (using lxvkq) or splat with shift of a +/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector. +/// LXVKQ instruction load VSX vector with a special quadword value based on an +/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value +/// 0x8000_0000_0000_0000_0000_0000_0000_0000. +/// This can be used to inline the build vector constants that have the +/// following patterns: +/// +/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) +/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a +/// combination of splatting and right shift instructions. + +SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op, + SelectionDAG &DAG) const { + + assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) && + "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue"); + + // This transformation is only supported if we are loading either a byte, + // halfword, word, or doubleword. + EVT VT = Op.getValueType(); + if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 || + VT == MVT::v2i64)) + return SDValue(); + + LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector (" + << VT.getEVTString() << "): "; + Op->dump()); + + unsigned NumElems = VT.getVectorNumElements(); + unsigned ElemBits = VT.getScalarSizeInBits(); + + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Check for Non-constant operand in the build vector. + for (const SDValue &Operand : Op.getNode()->op_values()) { + if (!isa<ConstantSDNode>(Operand)) + return SDValue(); + } + + // Assemble build vector operands as a 128-bit register value + // We need to reconstruct what the 128-bit register pattern would be + // that produces this vector when interpreted with the current endianness + APInt FullVal = APInt::getZero(128); + + for (unsigned Index = 0; Index < NumElems; ++Index) { + auto *C = cast<ConstantSDNode>(Op.getOperand(Index)); + + // Get element value as raw bits (zero-extended) + uint64_t ElemValue = C->getZExtValue(); + + // Mask to element size to ensure we only get the relevant bits + if (ElemBits < 64) + ElemValue &= ((1ULL << ElemBits) - 1); + + // Calculate bit position for this element in the 128-bit register + unsigned BitPos = + (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits); + + // Create APInt for the element value and shift it to correct position + APInt ElemAPInt(128, ElemValue); + ElemAPInt <<= BitPos; + + // Place the element value at the correct bit position + FullVal |= ElemAPInt; + } + + if (FullVal.isZero() || FullVal.isAllOnes()) + return SDValue(); + + if (auto UIMOpt = getPatternInfo(FullVal)) { + const auto &[Uim, ShiftAmount] = *UIMOpt; + SDLoc Dl(Op); + + // Generate LXVKQ instruction if the shift amount is zero. + if (ShiftAmount == 0) { + SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32); + SDValue LxvkqInstr = + SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0); + LLVM_DEBUG(llvm::dbgs() + << "combineBVLoadsSpecialValue: Instruction Emitted "; + LxvkqInstr.dump()); + return LxvkqInstr; + } + + assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value"); + + // The right shifted pattern can be constructed using a combination of + // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower + // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate + // value 255. + SDValue ShiftAmountVec = + SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32, + DAG.getTargetConstant(255, Dl, MVT::i32)), + 0); + // Generate appropriate right shift instruction + SDValue ShiftVec = SDValue( + DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec), + 0); + LLVM_DEBUG(llvm::dbgs() + << "\n combineBVLoadsSpecialValue: Instruction Emitted "; + ShiftVec.dump()); + return ShiftVec; + } + // No patterns matched for build vectors. + return SDValue(); +} + /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 59f3387..880aca7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1472,6 +1472,9 @@ namespace llvm { combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineBVLoadsSpecialValue(SDValue Operand, + SelectionDAG &DAG) const; + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the VINSERTH instruction introduced in ISA 3.0. This is /// essentially any shuffle of v8i16 vectors that just inserts one element diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2384959..2d8c633 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2404,6 +2404,190 @@ multiclass XXEvalTernarySelectOr<ValueType Vt> { 126>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNor +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOR(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, AND(B,C), XOR(B,C), NOT(C), +// NOT(B), NAND(B,C)} +// - C is the "false" case op NOR(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNor<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOR(B,C)) XXEVAL immediate value: 129 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 129>; + + // Pattern: (A ? B : NOR(B,C)) XXEVAL immediate value: 131 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNor Vt:$vB, Vt:$vC)),131>; + + // Pattern: (A ? C : NOR(B,C)) XXEVAL immediate value: 133 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNor Vt:$vB, Vt:$vC)), + 133>; + + // Pattern: (A ? XOR(B,C) : NOR(B,C)) XXEVAL immediate value: 134 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 134>; + + // Pattern: (A ? NOT(C) : NOR(B,C)) XXEVAL immediate value: 138 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 138>; + + // Pattern: (A ? NOT(B) : NOR(B,C)) XXEVAL immediate value: 140 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNor Vt:$vB, Vt:$vC)), + 140>; + + // Pattern: (A ? NAND(B,C) : NOR(B,C)) XXEVAL immediate value: 142 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNor Vt:$vB, Vt:$vC)), + 142>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectEqv +// This class matches the equivalent Ternary Operation: A ? f(B,C) : EQV(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {OR(B,C), NOR(B,C), NAND(B,C), NOT(B), +// NOT(C)} +// - C is the "false" case op EQV(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectEqv<ValueType Vt>{ + // Pattern: (A ? OR(B,C) : EQV(B,C)) XXEVAL immediate value: 151 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 151>; + + // Pattern: (A ? NOR(B,C) : EQV(B,C)) XXEVAL immediate value: 152 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 152>; + + // Pattern: (A ? NOT(C) : EQV(B,C)) XXEVAL immediate value: 154 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 154>; + + // Pattern: (A ? NAND(B,C) : EQV(B,C)) XXEVAL immediate value: 158 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VEqv Vt:$vB, Vt:$vC)), + 158>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// B, NOT(B)} +// - C is the "false" case op NOT(C) +// ============================================================================= +multiclass XXEvalTernarySelectNotC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(C)) XXEVAL immediate value: 161 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 161>; + + // Pattern: (A ? B : NOT(C)) XXEVAL immediate value: 163 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vB, (VNot Vt:$vC)), 163>; + + // Pattern: (A ? XOR(B,C) : NOT(C)) XXEVAL immediate value: 166 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 166>; + + // Pattern: (A ? OR(B,C) : NOT(C)) XXEVAL immediate value: 167 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 167>; + + // Pattern: (A ? NOT(B) : NOT(C)) XXEVAL immediate value: 172 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VNot Vt:$vC)), 172>; + + // Pattern: (A ? NAND(B,C) : NOT(C)) XXEVAL immediate value: 174 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vC)), 174>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNotB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NOT(B) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {AND(B,C), OR(B,C), XOR(B,C), NAND(B,C), +// C, NOT(B)} +// - C is the "false" case op NOT(B) +// ============================================================================= +multiclass XXEvalTernarySelectNotB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : NOT(B)) XXEVAL immediate value: 193 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 193>; + + // Pattern: (A ? C : NOT(B)) XXEVAL immediate value: 197 + def : XXEvalPattern<Vt, (vselect Vt:$vA, Vt:$vC, (VNot Vt:$vB)), 197>; + + // Pattern: (A ? XOR(B,C) : NOT(B)) XXEVAL immediate value: 198 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 198>; + + // Pattern: (A ? OR(B,C) : NOT(B)) XXEVAL immediate value: 199 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 199>; + + // Pattern: (A ? NOT(C) : NOT(B)) XXEVAL immediate value: 202 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VNot Vt:$vB)), 202>; + + // Pattern: (A ? NAND(B,C) : NOT(B)) XXEVAL immediate value: 206 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), (VNot Vt:$vB)), 206>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectNand +// This class matches the equivalent Ternary Operation: A ? f(B,C) : NAND(B,C) +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op in set {B, C, XOR(B,C), OR(B,C), EQV(B,C)} +// - C is the "false" case op NAND(B,C) +// ============================================================================= +multiclass XXEvalTernarySelectNand<ValueType Vt>{ + // Pattern: (A ? B : NAND(B,C)) XXEVAL immediate value: 227 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vB, (VNand Vt:$vB, Vt:$vC)), 227>; + + // Pattern: (A ? C : NAND(B,C)) XXEVAL immediate value: 229 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, Vt:$vC, (VNand Vt:$vB, Vt:$vC)), 229>; + + // Pattern: (A ? XOR(B,C) : NAND(B,C)) XXEVAL immediate value: 230 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 230>; + + // Pattern: (A ? OR(B,C) : NAND(B,C)) XXEVAL immediate value: 231 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VOr Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 231>; + + // Pattern: (A ? EQV(B,C) : NAND(B,C)) XXEVAL immediate value: 233 + def : XXEvalPattern< + Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VNand Vt:$vB, Vt:$vC)), + 233>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2519,6 +2703,11 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { defm : XXEvalTernarySelectC<Ty>; defm : XXEvalTernarySelectXor<Ty>; defm : XXEvalTernarySelectOr<Ty>; + defm : XXEvalTernarySelectNor<Ty>; + defm : XXEvalTernarySelectEqv<Ty>; + defm : XXEvalTernarySelectNotC<Ty>; + defm : XXEvalTernarySelectNotB<Ty>; + defm : XXEvalTernarySelectNand<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. |