diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 172 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 79 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/Utils.cpp | 12 |
5 files changed, 246 insertions, 48 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 9ba1782..90a18b86 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -132,9 +132,10 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); for (const auto &Arg : CB.args()) { - ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i), - i < NumFixedArgs}; + ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i)}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); + if (i >= NumFixedArgs) + OrigArg.Flags[0].setVarArg(); // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. @@ -301,7 +302,7 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, // double] -> double). SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx), OrigArg.OrigArgIndex, OrigArg.Flags[0], - OrigArg.IsFixed, OrigArg.OrigValue); + OrigArg.OrigValue); return; } @@ -313,7 +314,7 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg, for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) { Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx); SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex, - OrigArg.Flags[0], OrigArg.IsFixed); + OrigArg.Flags[0]); if (NeedsRegBlock) SplitArgs.back().Flags[0].setInConsecutiveRegs(); } @@ -1098,7 +1099,7 @@ bool CallLowering::checkReturn(CCState &CCInfo, CCAssignFn *Fn) const { for (unsigned I = 0, E = Outs.size(); I < E; ++I) { MVT VT = MVT::getVT(Outs[I].Ty); - if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo)) + if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], Outs[I].Ty, CCInfo)) return false; } return true; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index e84ba91..0674f5f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1821,10 +1821,29 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI, return false; } + // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are + // inbounds, reaching the same result in one G_PTR_ADD is also inbounds. + // The nusw constraints are satisfied because imm1+imm2 cannot exceed the + // largest signed integer that fits into the index type, which is the maximum + // size of allocated objects according to the IR Language Reference. + unsigned PtrAddFlags = MI.getFlags(); + unsigned LHSPtrAddFlags = Add2Def->getFlags(); + bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap; + bool IsInBounds = + PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds; + unsigned Flags = 0; + if (IsNoUWrap) + Flags |= MachineInstr::MIFlag::NoUWrap; + if (IsInBounds) { + Flags |= MachineInstr::MIFlag::InBounds; + Flags |= MachineInstr::MIFlag::NoUSWrap; + } + // Pass the combined immediate to the apply function. MatchInfo.Imm = AMNew.BaseOffs; MatchInfo.Base = Base; MatchInfo.Bank = getRegBank(Imm2); + MatchInfo.Flags = Flags; return true; } @@ -1838,6 +1857,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI, Observer.changingInstr(MI); MI.getOperand(1).setReg(MatchInfo.Base); MI.getOperand(2).setReg(NewOffset.getReg(0)); + MI.setFlags(MatchInfo.Flags); Observer.changedInstr(MI); } @@ -4871,14 +4891,34 @@ bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI, if (!C2) return false; + // If both additions are nuw, the reassociated additions are also nuw. + // If the original G_PTR_ADD is additionally nusw, X and C are both not + // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are + // therefore also nusw. + // If the original G_PTR_ADD is additionally inbounds (which implies nusw), + // the new G_PTR_ADDs are then also inbounds. + unsigned PtrAddFlags = MI.getFlags(); + unsigned AddFlags = RHS->getFlags(); + bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap; + bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap); + bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds); + unsigned Flags = 0; + if (IsNoUWrap) + Flags |= MachineInstr::MIFlag::NoUWrap; + if (IsNoUSWrap) + Flags |= MachineInstr::MIFlag::NoUSWrap; + if (IsInBounds) + Flags |= MachineInstr::MIFlag::InBounds; + MatchInfo = [=, &MI](MachineIRBuilder &B) { LLT PtrTy = MRI.getType(MI.getOperand(0).getReg()); auto NewBase = - Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg()); + Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags); Observer.changingInstr(MI); MI.getOperand(1).setReg(NewBase.getReg(0)); MI.getOperand(2).setReg(RHS->getOperand(2).getReg()); + MI.setFlags(Flags); Observer.changedInstr(MI); }; return !reassociationCanBreakAddressingModePattern(MI); @@ -4897,6 +4937,25 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, return false; auto *LHSPtrAdd = cast<GPtrAdd>(LHS); + + // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are + // nuw and inbounds (which implies nusw), the offsets are both non-negative, + // so the new G_PTR_ADDs are also inbounds. + unsigned PtrAddFlags = MI.getFlags(); + unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags(); + bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap; + bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags & + MachineInstr::MIFlag::NoUSWrap); + bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags & + MachineInstr::MIFlag::InBounds); + unsigned Flags = 0; + if (IsNoUWrap) + Flags |= MachineInstr::MIFlag::NoUWrap; + if (IsNoUSWrap) + Flags |= MachineInstr::MIFlag::NoUSWrap; + if (IsInBounds) + Flags |= MachineInstr::MIFlag::InBounds; + MatchInfo = [=, &MI](MachineIRBuilder &B) { // When we change LHSPtrAdd's offset register we might cause it to use a reg // before its def. Sink the instruction so the outer PTR_ADD to ensure this @@ -4907,9 +4966,11 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI, auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value); Observer.changingInstr(MI); MI.getOperand(2).setReg(NewCst.getReg(0)); + MI.setFlags(Flags); Observer.changedInstr(MI); Observer.changingInstr(*LHSPtrAdd); LHSPtrAdd->getOperand(2).setReg(RHSReg); + LHSPtrAdd->setFlags(Flags); Observer.changedInstr(*LHSPtrAdd); }; return !reassociationCanBreakAddressingModePattern(MI); @@ -4933,11 +4994,30 @@ bool CombinerHelper::matchReassocFoldConstantsInSubTree( if (!C2) return false; + // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are + // inbounds, reaching the same result in one G_PTR_ADD is also inbounds. + // The nusw constraints are satisfied because imm1+imm2 cannot exceed the + // largest signed integer that fits into the index type, which is the maximum + // size of allocated objects according to the IR Language Reference. + unsigned PtrAddFlags = MI.getFlags(); + unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags(); + bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap; + bool IsInBounds = + PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds; + unsigned Flags = 0; + if (IsNoUWrap) + Flags |= MachineInstr::MIFlag::NoUWrap; + if (IsInBounds) { + Flags |= MachineInstr::MIFlag::InBounds; + Flags |= MachineInstr::MIFlag::NoUSWrap; + } + MatchInfo = [=, &MI](MachineIRBuilder &B) { auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2); Observer.changingInstr(MI); MI.getOperand(1).setReg(LHSSrc1); MI.getOperand(2).setReg(NewCst.getReg(0)); + MI.setFlags(Flags); Observer.changedInstr(MI); }; return !reassociationCanBreakAddressingModePattern(MI); @@ -5844,6 +5924,96 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) const { MI.eraseFromParent(); } +bool CombinerHelper::matchTruncSSatS(MachineInstr &MI, + Register &MatchInfo) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + unsigned NumDstBits = DstTy.getScalarSizeInBits(); + unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); + assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); + + if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}})) + return false; + + APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); + APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); + return mi_match(Src, MRI, + m_GSMin(m_GSMax(m_Reg(MatchInfo), + m_SpecificICstOrSplat(SignedMin)), + m_SpecificICstOrSplat(SignedMax))) || + mi_match(Src, MRI, + m_GSMax(m_GSMin(m_Reg(MatchInfo), + m_SpecificICstOrSplat(SignedMax)), + m_SpecificICstOrSplat(SignedMin))); +} + +void CombinerHelper::applyTruncSSatS(MachineInstr &MI, + Register &MatchInfo) const { + Register Dst = MI.getOperand(0).getReg(); + Builder.buildTruncSSatS(Dst, MatchInfo); + MI.eraseFromParent(); +} + +bool CombinerHelper::matchTruncSSatU(MachineInstr &MI, + Register &MatchInfo) const { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + unsigned NumDstBits = DstTy.getScalarSizeInBits(); + unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); + assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); + + if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}})) + return false; + APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits); + return mi_match(Src, MRI, + m_GSMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)), + m_SpecificICstOrSplat(UnsignedMax))) || + mi_match(Src, MRI, + m_GSMax(m_GSMin(m_Reg(MatchInfo), + m_SpecificICstOrSplat(UnsignedMax)), + m_SpecificICstOrSplat(0))) || + mi_match(Src, MRI, + m_GUMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)), + m_SpecificICstOrSplat(UnsignedMax))); +} + +void CombinerHelper::applyTruncSSatU(MachineInstr &MI, + Register &MatchInfo) const { + Register Dst = MI.getOperand(0).getReg(); + Builder.buildTruncSSatU(Dst, MatchInfo); + MI.eraseFromParent(); +} + +bool CombinerHelper::matchTruncUSatU(MachineInstr &MI, + MachineInstr &MinMI) const { + Register Min = MinMI.getOperand(2).getReg(); + Register Val = MinMI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(Val); + unsigned NumDstBits = DstTy.getScalarSizeInBits(); + unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); + assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); + + if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}})) + return false; + APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits); + return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) && + !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg())); +} + +bool CombinerHelper::matchTruncUSatUToFPTOUISat(MachineInstr &MI, + MachineInstr &SrcMI) const { + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg()); + + return LI && + isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}}); +} + bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const { unsigned Opc = MI.getOpcode(); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index bbfae57..541269a 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -13,7 +13,6 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2209,7 +2208,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; - const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1)); + const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0)); if (!AI || !AI->isStaticAlloca()) return true; @@ -2522,6 +2521,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_VECREDUCE_SEQ_FADD : TargetOpcode::G_VECREDUCE_SEQ_FMUL; + if (!MRI->getType(VecSrc).isVector()) + Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD + : TargetOpcode::G_FMUL; MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); return true; @@ -2556,6 +2558,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, getOrCreateVReg(*ConstantInt::getTrue(CI.getType()))); return true; case Intrinsic::amdgcn_cs_chain: + case Intrinsic::amdgcn_call_whole_wave: return translateCallBase(CI, MIRBuilder); case Intrinsic::fptrunc_round: { uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI); @@ -2786,11 +2789,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (CI.isInlineAsm()) return translateInlineAsm(CI, MIRBuilder); - diagnoseDontCall(CI); - Intrinsic::ID ID = F ? F->getIntrinsicID() : Intrinsic::not_intrinsic; - if (!F || ID == Intrinsic::not_intrinsic) - return translateCallBase(CI, MIRBuilder); + if (!F || ID == Intrinsic::not_intrinsic) { + if (translateCallBase(CI, MIRBuilder)) { + diagnoseDontCall(CI); + return true; + } + return false; + } assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -3513,7 +3519,7 @@ void IRTranslator::finishPendingPhis() { Verifier.setCurrentInst(PI); #endif // ifndef NDEBUG - SmallSet<const MachineBasicBlock *, 16> SeenPreds; + SmallPtrSet<const MachineBasicBlock *, 16> SeenPreds; for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { auto IRPred = PI->getIncomingBlock(i); ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d9d3569..a435396 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5222,19 +5222,13 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, InsertVal = MI.getOperand(2).getReg(); Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); - - // TODO: Handle total scalarization case. - if (!NarrowVecTy.isVector()) - return UnableToLegalize; - LLT VecTy = MRI.getType(SrcVec); // If the index is a constant, we can really break this down as you would // expect, and index into the target size pieces. - int64_t IdxVal; auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI); if (MaybeCst) { - IdxVal = MaybeCst->Value.getSExtValue(); + uint64_t IdxVal = MaybeCst->Value.getZExtValue(); // Avoid out of bounds indexing the pieces. if (IdxVal >= VecTy.getNumElements()) { MIRBuilder.buildUndef(DstReg); @@ -5242,33 +5236,45 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, return Legalized; } - SmallVector<Register, 8> VecParts; - LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); + if (!NarrowVecTy.isVector()) { + SmallVector<Register, 8> SplitPieces; + extractParts(MI.getOperand(1).getReg(), NarrowVecTy, + VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI); + if (IsInsert) { + SplitPieces[IdxVal] = InsertVal; + MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces); + } else { + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]); + } + } else { + SmallVector<Register, 8> VecParts; + LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); - // Build a sequence of NarrowTy pieces in VecParts for this operand. - LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, - TargetOpcode::G_ANYEXT); + // Build a sequence of NarrowTy pieces in VecParts for this operand. + LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); - unsigned NewNumElts = NarrowVecTy.getNumElements(); + unsigned NewNumElts = NarrowVecTy.getNumElements(); - LLT IdxTy = MRI.getType(Idx); - int64_t PartIdx = IdxVal / NewNumElts; - auto NewIdx = - MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); + LLT IdxTy = MRI.getType(Idx); + int64_t PartIdx = IdxVal / NewNumElts; + auto NewIdx = + MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); - if (IsInsert) { - LLT PartTy = MRI.getType(VecParts[PartIdx]); + if (IsInsert) { + LLT PartTy = MRI.getType(VecParts[PartIdx]); - // Use the adjusted index to insert into one of the subvectors. - auto InsertPart = MIRBuilder.buildInsertVectorElement( - PartTy, VecParts[PartIdx], InsertVal, NewIdx); - VecParts[PartIdx] = InsertPart.getReg(0); + // Use the adjusted index to insert into one of the subvectors. + auto InsertPart = MIRBuilder.buildInsertVectorElement( + PartTy, VecParts[PartIdx], InsertVal, NewIdx); + VecParts[PartIdx] = InsertPart.getReg(0); - // Recombine the inserted subvector with the others to reform the result - // vector. - buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); - } else { - MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + // Recombine the inserted subvector with the others to reform the result + // vector. + buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); + } else { + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + } } MI.eraseFromParent(); @@ -5574,12 +5580,19 @@ LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx, unsigned NewElemCount = NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits(); - LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType()); - - // Split the Src and Dst Reg into smaller registers SmallVector<Register> SrcVRegs, BitcastVRegs; - if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy) - return UnableToLegalize; + if (NewElemCount == 1) { + LLT SrcNarrowTy = SrcTy.getElementType(); + + auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg); + getUnmergeResults(SrcVRegs, *Unmerge); + } else { + LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType()); + + // Split the Src and Dst Reg into smaller registers + if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy) + return UnableToLegalize; + } // Build new smaller bitcast instructions // Not supporting Leftover types for now but will have to diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 8955dd0..58d631e 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -466,8 +466,14 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) { std::optional<DefinitionAndSourceRegister> llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) { Register DefSrcReg = Reg; - auto *DefMI = MRI.getVRegDef(Reg); - auto DstTy = MRI.getType(DefMI->getOperand(0).getReg()); + // This assumes that the code is in SSA form, so there should only be one + // definition. + auto DefIt = MRI.def_begin(Reg); + if (DefIt == MRI.def_end()) + return {}; + MachineOperand &DefOpnd = *DefIt; + MachineInstr *DefMI = DefOpnd.getParent(); + auto DstTy = MRI.getType(DefOpnd.getReg()); if (!DstTy.isValid()) return std::nullopt; unsigned Opc = DefMI->getOpcode(); @@ -1869,8 +1875,10 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI, case TargetOpcode::G_FSHR: case TargetOpcode::G_SMAX: case TargetOpcode::G_SMIN: + case TargetOpcode::G_SCMP: case TargetOpcode::G_UMAX: case TargetOpcode::G_UMIN: + case TargetOpcode::G_UCMP: case TargetOpcode::G_PTRMASK: case TargetOpcode::G_SADDO: case TargetOpcode::G_SSUBO: |