diff options
author | zhijian lin <zhijian@ca.ibm.com> | 2025-07-04 10:07:03 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-04 10:07:03 -0400 |
commit | 45909ec469cea4bc479d5c7d0731dec8e8e91112 (patch) | |
tree | 00e8a416084ba5f0307f8fc6105982520ca062d6 | |
parent | e5cd9bdfea90def36df4f48186a4434306e50a00 (diff) | |
download | llvm-45909ec469cea4bc479d5c7d0731dec8e8e91112.zip llvm-45909ec469cea4bc479d5c7d0731dec8e8e91112.tar.gz llvm-45909ec469cea4bc479d5c7d0731dec8e8e91112.tar.bz2 |
[PowePC] using MTVSRBMI instruction instead of constant pool in power10+ (#144084)
The instruction MTVSRBMI set 0x00(or 0xFF) to each byte of VSR based on
the bits mask. Using the instruction instead of constant pool can reduce
the asm code size and instructions in power10.
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 50 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/mtvsrbmi.ll | 23 |
2 files changed, 54 insertions, 19 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5a4a634..3851f05 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9584,6 +9584,37 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, return false; } +bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) { + unsigned int NumOps = BVN.getNumOperands(); + assert(NumOps > 0 && "Unexpected 0-size build vector"); + + BitMask.clearAllBits(); + EVT VT = BVN.getValueType(0); + APInt ConstValue(VT.getSizeInBits(), 0); + + unsigned EltWidth = VT.getScalarSizeInBits(); + + unsigned BitPos = 0; + for (auto OpVal : BVN.op_values()) { + auto *CN = dyn_cast<ConstantSDNode>(OpVal); + + if (!CN) + return false; + + ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + BitPos += EltWidth; + } + + for (unsigned J = 0; J < 16; ++J) { + APInt ExtractValue = ConstValue.extractBits(8, J * 8); + if (ExtractValue != 0x00 && ExtractValue != 0xFF) + return false; + if (ExtractValue == 0xFF) + BitMask.setBit(J); + } + return true; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9595,6 +9626,25 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + if (Subtarget.hasP10Vector()) { + APInt BitMask(32, 0); + // If the value of the vector is all zeros or all ones, + // we do not convert it to MTVSRBMI. + // The xxleqv instruction sets a vector with all ones. + // The xxlxor instruction sets a vector with all zeros. + if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) { + SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32); + MachineSDNode *MSDNode = + DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant); + SDValue SDV = SDValue(MSDNode, 0); + EVT DVT = BVN->getValueType(0); + EVT SVT = SDV.getValueType(); + if (SVT != DVT) { + SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV); + } + return SDV; + } + } // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll index 5486dc0..232014d 100644 --- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll +++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll @@ -10,28 +10,13 @@ ; RUN: | FileCheck %s --check-prefix=CHECK define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() { -; CHECK: L..CPI0_0: -; CHECK-NEXT: .byte 255 # 0xff -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 -; CHECK-NEXT: .byte 0 # 0x0 +; CHECK-NOT: L..CPI0_0: +; CHECK-NOT: .byte 255 # 0xff +; CHECK-NOT: .byte 0 # 0x0 ; CHECK-LABEL: _Z5v00FFv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lwz r3, L..C0(r2) # %const.0 -; CHECK-NEXT: lxv vs34, 0(r3) +; CHECK-NEXT: mtvsrbmi v2, 1 ; CHECK-NEXT: blr entry: ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> |