aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/GlobalISel
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp172
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp20
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp79
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp12
5 files changed, 246 insertions, 48 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 9ba1782..90a18b86 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -132,9 +132,10 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
unsigned i = 0;
unsigned NumFixedArgs = CB.getFunctionType()->getNumParams();
for (const auto &Arg : CB.args()) {
- ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i),
- i < NumFixedArgs};
+ ArgInfo OrigArg{ArgRegs[i], *Arg.get(), i, getAttributesForArgIdx(CB, i)};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB);
+ if (i >= NumFixedArgs)
+ OrigArg.Flags[0].setVarArg();
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
@@ -301,7 +302,7 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// double] -> double).
SplitArgs.emplace_back(OrigArg.Regs[0], SplitVTs[0].getTypeForEVT(Ctx),
OrigArg.OrigArgIndex, OrigArg.Flags[0],
- OrigArg.IsFixed, OrigArg.OrigValue);
+ OrigArg.OrigValue);
return;
}
@@ -313,7 +314,7 @@ void CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.OrigArgIndex,
- OrigArg.Flags[0], OrigArg.IsFixed);
+ OrigArg.Flags[0]);
if (NeedsRegBlock)
SplitArgs.back().Flags[0].setInConsecutiveRegs();
}
@@ -1098,7 +1099,7 @@ bool CallLowering::checkReturn(CCState &CCInfo,
CCAssignFn *Fn) const {
for (unsigned I = 0, E = Outs.size(); I < E; ++I) {
MVT VT = MVT::getVT(Outs[I].Ty);
- if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], CCInfo))
+ if (Fn(I, VT, VT, CCValAssign::Full, Outs[I].Flags[0], Outs[I].Ty, CCInfo))
return false;
}
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e84ba91..0674f5f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1821,10 +1821,29 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
return false;
}
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
+ // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
+ // largest signed integer that fits into the index type, which is the maximum
+ // size of allocated objects according to the IR Language Reference.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = Add2Def->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsInBounds =
+ PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsInBounds) {
+ Flags |= MachineInstr::MIFlag::InBounds;
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ }
+
// Pass the combined immediate to the apply function.
MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
MatchInfo.Bank = getRegBank(Imm2);
+ MatchInfo.Flags = Flags;
return true;
}
@@ -1838,6 +1857,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
+ MI.setFlags(MatchInfo.Flags);
Observer.changedInstr(MI);
}
@@ -4871,14 +4891,34 @@ bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
if (!C2)
return false;
+ // If both additions are nuw, the reassociated additions are also nuw.
+ // If the original G_PTR_ADD is additionally nusw, X and C are both not
+ // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
+ // therefore also nusw.
+ // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
+ // the new G_PTR_ADDs are then also inbounds.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned AddFlags = RHS->getFlags();
+ bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
+ bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsNoUSWrap)
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ if (IsInBounds)
+ Flags |= MachineInstr::MIFlag::InBounds;
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(NewBase.getReg(0));
MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
};
return !reassociationCanBreakAddressingModePattern(MI);
@@ -4897,6 +4937,25 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
return false;
auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // nuw and inbounds (which implies nusw), the offsets are both non-negative,
+ // so the new G_PTR_ADDs are also inbounds.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
+ MachineInstr::MIFlag::NoUSWrap);
+ bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
+ MachineInstr::MIFlag::InBounds);
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsNoUSWrap)
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ if (IsInBounds)
+ Flags |= MachineInstr::MIFlag::InBounds;
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
// When we change LHSPtrAdd's offset register we might cause it to use a reg
// before its def. Sink the instruction so the outer PTR_ADD to ensure this
@@ -4907,9 +4966,11 @@ bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
Observer.changingInstr(MI);
MI.getOperand(2).setReg(NewCst.getReg(0));
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
Observer.changingInstr(*LHSPtrAdd);
LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ LHSPtrAdd->setFlags(Flags);
Observer.changedInstr(*LHSPtrAdd);
};
return !reassociationCanBreakAddressingModePattern(MI);
@@ -4933,11 +4994,30 @@ bool CombinerHelper::matchReassocFoldConstantsInSubTree(
if (!C2)
return false;
+ // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
+ // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
+ // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
+ // largest signed integer that fits into the index type, which is the maximum
+ // size of allocated objects according to the IR Language Reference.
+ unsigned PtrAddFlags = MI.getFlags();
+ unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
+ bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
+ bool IsInBounds =
+ PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
+ unsigned Flags = 0;
+ if (IsNoUWrap)
+ Flags |= MachineInstr::MIFlag::NoUWrap;
+ if (IsInBounds) {
+ Flags |= MachineInstr::MIFlag::InBounds;
+ Flags |= MachineInstr::MIFlag::NoUSWrap;
+ }
+
MatchInfo = [=, &MI](MachineIRBuilder &B) {
auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(LHSSrc1);
MI.getOperand(2).setReg(NewCst.getReg(0));
+ MI.setFlags(Flags);
Observer.changedInstr(MI);
};
return !reassociationCanBreakAddressingModePattern(MI);
@@ -5844,6 +5924,96 @@ void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) const {
MI.eraseFromParent();
}
+bool CombinerHelper::matchTruncSSatS(MachineInstr &MI,
+ Register &MatchInfo) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned NumDstBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
+
+ if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
+ return false;
+
+ APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
+ APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
+ return mi_match(Src, MRI,
+ m_GSMin(m_GSMax(m_Reg(MatchInfo),
+ m_SpecificICstOrSplat(SignedMin)),
+ m_SpecificICstOrSplat(SignedMax))) ||
+ mi_match(Src, MRI,
+ m_GSMax(m_GSMin(m_Reg(MatchInfo),
+ m_SpecificICstOrSplat(SignedMax)),
+ m_SpecificICstOrSplat(SignedMin)));
+}
+
+void CombinerHelper::applyTruncSSatS(MachineInstr &MI,
+ Register &MatchInfo) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Builder.buildTruncSSatS(Dst, MatchInfo);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchTruncSSatU(MachineInstr &MI,
+ Register &MatchInfo) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ unsigned NumDstBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
+
+ if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
+ return false;
+ APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
+ return mi_match(Src, MRI,
+ m_GSMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)),
+ m_SpecificICstOrSplat(UnsignedMax))) ||
+ mi_match(Src, MRI,
+ m_GSMax(m_GSMin(m_Reg(MatchInfo),
+ m_SpecificICstOrSplat(UnsignedMax)),
+ m_SpecificICstOrSplat(0))) ||
+ mi_match(Src, MRI,
+ m_GUMin(m_GSMax(m_Reg(MatchInfo), m_SpecificICstOrSplat(0)),
+ m_SpecificICstOrSplat(UnsignedMax)));
+}
+
+void CombinerHelper::applyTruncSSatU(MachineInstr &MI,
+ Register &MatchInfo) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Builder.buildTruncSSatU(Dst, MatchInfo);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchTruncUSatU(MachineInstr &MI,
+ MachineInstr &MinMI) const {
+ Register Min = MinMI.getOperand(2).getReg();
+ Register Val = MinMI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(Val);
+ unsigned NumDstBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
+
+ if (!LI || !isLegal({TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
+ return false;
+ APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
+ return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
+ !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
+}
+
+bool CombinerHelper::matchTruncUSatUToFPTOUISat(MachineInstr &MI,
+ MachineInstr &SrcMI) const {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
+
+ return LI &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
+}
+
bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
BuildFnTy &MatchInfo) const {
unsigned Opc = MI.getOpcode();
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bbfae57..541269a 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -2209,7 +2208,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
: TargetOpcode::LIFETIME_END;
- const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(1));
+ const AllocaInst *AI = dyn_cast<AllocaInst>(CI.getArgOperand(0));
if (!AI || !AI->isStaticAlloca())
return true;
@@ -2522,6 +2521,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Opc = ID == Intrinsic::vector_reduce_fadd
? TargetOpcode::G_VECREDUCE_SEQ_FADD
: TargetOpcode::G_VECREDUCE_SEQ_FMUL;
+ if (!MRI->getType(VecSrc).isVector())
+ Opc = ID == Intrinsic::vector_reduce_fadd ? TargetOpcode::G_FADD
+ : TargetOpcode::G_FMUL;
MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc},
MachineInstr::copyFlagsFromInstruction(CI));
return true;
@@ -2556,6 +2558,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*ConstantInt::getTrue(CI.getType())));
return true;
case Intrinsic::amdgcn_cs_chain:
+ case Intrinsic::amdgcn_call_whole_wave:
return translateCallBase(CI, MIRBuilder);
case Intrinsic::fptrunc_round: {
uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
@@ -2786,11 +2789,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
- diagnoseDontCall(CI);
-
Intrinsic::ID ID = F ? F->getIntrinsicID() : Intrinsic::not_intrinsic;
- if (!F || ID == Intrinsic::not_intrinsic)
- return translateCallBase(CI, MIRBuilder);
+ if (!F || ID == Intrinsic::not_intrinsic) {
+ if (translateCallBase(CI, MIRBuilder)) {
+ diagnoseDontCall(CI);
+ return true;
+ }
+ return false;
+ }
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -3513,7 +3519,7 @@ void IRTranslator::finishPendingPhis() {
Verifier.setCurrentInst(PI);
#endif // ifndef NDEBUG
- SmallSet<const MachineBasicBlock *, 16> SeenPreds;
+ SmallPtrSet<const MachineBasicBlock *, 16> SeenPreds;
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
auto IRPred = PI->getIncomingBlock(i);
ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d9d3569..a435396 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5222,19 +5222,13 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
InsertVal = MI.getOperand(2).getReg();
Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
-
- // TODO: Handle total scalarization case.
- if (!NarrowVecTy.isVector())
- return UnableToLegalize;
-
LLT VecTy = MRI.getType(SrcVec);
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
- int64_t IdxVal;
auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
if (MaybeCst) {
- IdxVal = MaybeCst->Value.getSExtValue();
+ uint64_t IdxVal = MaybeCst->Value.getZExtValue();
// Avoid out of bounds indexing the pieces.
if (IdxVal >= VecTy.getNumElements()) {
MIRBuilder.buildUndef(DstReg);
@@ -5242,33 +5236,45 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
return Legalized;
}
- SmallVector<Register, 8> VecParts;
- LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
+ if (!NarrowVecTy.isVector()) {
+ SmallVector<Register, 8> SplitPieces;
+ extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
+ VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
+ if (IsInsert) {
+ SplitPieces[IdxVal] = InsertVal;
+ MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
+ } else {
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
+ }
+ } else {
+ SmallVector<Register, 8> VecParts;
+ LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
- // Build a sequence of NarrowTy pieces in VecParts for this operand.
- LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
- TargetOpcode::G_ANYEXT);
+ // Build a sequence of NarrowTy pieces in VecParts for this operand.
+ LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
+ TargetOpcode::G_ANYEXT);
- unsigned NewNumElts = NarrowVecTy.getNumElements();
+ unsigned NewNumElts = NarrowVecTy.getNumElements();
- LLT IdxTy = MRI.getType(Idx);
- int64_t PartIdx = IdxVal / NewNumElts;
- auto NewIdx =
- MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
+ LLT IdxTy = MRI.getType(Idx);
+ int64_t PartIdx = IdxVal / NewNumElts;
+ auto NewIdx =
+ MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
- if (IsInsert) {
- LLT PartTy = MRI.getType(VecParts[PartIdx]);
+ if (IsInsert) {
+ LLT PartTy = MRI.getType(VecParts[PartIdx]);
- // Use the adjusted index to insert into one of the subvectors.
- auto InsertPart = MIRBuilder.buildInsertVectorElement(
- PartTy, VecParts[PartIdx], InsertVal, NewIdx);
- VecParts[PartIdx] = InsertPart.getReg(0);
+ // Use the adjusted index to insert into one of the subvectors.
+ auto InsertPart = MIRBuilder.buildInsertVectorElement(
+ PartTy, VecParts[PartIdx], InsertVal, NewIdx);
+ VecParts[PartIdx] = InsertPart.getReg(0);
- // Recombine the inserted subvector with the others to reform the result
- // vector.
- buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
- } else {
- MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ // Recombine the inserted subvector with the others to reform the result
+ // vector.
+ buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
+ } else {
+ MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
+ }
}
MI.eraseFromParent();
@@ -5574,12 +5580,19 @@ LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
unsigned NewElemCount =
NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
- LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
-
- // Split the Src and Dst Reg into smaller registers
SmallVector<Register> SrcVRegs, BitcastVRegs;
- if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
- return UnableToLegalize;
+ if (NewElemCount == 1) {
+ LLT SrcNarrowTy = SrcTy.getElementType();
+
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
+ getUnmergeResults(SrcVRegs, *Unmerge);
+ } else {
+ LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
+
+ // Split the Src and Dst Reg into smaller registers
+ if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
+ return UnableToLegalize;
+ }
// Build new smaller bitcast instructions
// Not supporting Leftover types for now but will have to
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8955dd0..58d631e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -466,8 +466,14 @@ llvm::getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI) {
std::optional<DefinitionAndSourceRegister>
llvm::getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI) {
Register DefSrcReg = Reg;
- auto *DefMI = MRI.getVRegDef(Reg);
- auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
+ // This assumes that the code is in SSA form, so there should only be one
+ // definition.
+ auto DefIt = MRI.def_begin(Reg);
+ if (DefIt == MRI.def_end())
+ return {};
+ MachineOperand &DefOpnd = *DefIt;
+ MachineInstr *DefMI = DefOpnd.getParent();
+ auto DstTy = MRI.getType(DefOpnd.getReg());
if (!DstTy.isValid())
return std::nullopt;
unsigned Opc = DefMI->getOpcode();
@@ -1869,8 +1875,10 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FSHR:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SCMP:
case TargetOpcode::G_UMAX:
case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UCMP:
case TargetOpcode::G_PTRMASK:
case TargetOpcode::G_SADDO:
case TargetOpcode::G_SSUBO: