aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/Support/APFloat.cpp9
-rw-r--r--llvm/lib/Support/Unix/Signals.inc3
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandImm.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td87
-rw-r--r--llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp48
-rw-r--r--llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp9
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h17
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h6
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp15
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp53
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp20
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h6
18 files changed, 162 insertions, 130 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c97300d..6bf9008 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26876,6 +26876,8 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// TODO: handle more extension/truncation cases as cases arise.
if (EltSizeInBits != ExtSrcSizeInBits)
return SDValue();
+ if (VT.getSizeInBits() != N00.getValueSizeInBits())
+ return SDValue();
// We can remove *extend_vector_inreg only if the truncation happens at
// the same scale as the extension.
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index b4de79a..4787604 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -2600,8 +2600,7 @@ APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
int exponentChange = omsb - fromSemantics.precision;
if (exponent + exponentChange < toSemantics.minExponent)
exponentChange = toSemantics.minExponent - exponent;
- if (exponentChange < shift)
- exponentChange = shift;
+ exponentChange = std::max(exponentChange, shift);
if (exponentChange < 0) {
shift -= exponentChange;
exponent += exponentChange;
@@ -3043,8 +3042,7 @@ IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
if (decSig.exponent < semantics->minExponent) {
excessPrecision += (semantics->minExponent - decSig.exponent);
truncatedBits = excessPrecision;
- if (excessPrecision > calcSemantics.precision)
- excessPrecision = calcSemantics.precision;
+ excessPrecision = std::min(excessPrecision, calcSemantics.precision);
}
/* Extra half-ulp lost in reciprocal of exponent. */
powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
@@ -3441,8 +3439,7 @@ char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
/* Convert as much of "part" to hexdigits as we can. */
unsigned int curDigits = integerPartWidth / 4;
- if (curDigits > outputDigits)
- curDigits = outputDigits;
+ curDigits = std::min(curDigits, outputDigits);
dst += partAsHex (dst, part, curDigits, hexDigitChars);
outputDigits -= curDigits;
}
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 573ad82..78d6540 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -868,8 +868,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
nwidth = strlen(name) - 1;
}
- if (nwidth > width)
- width = nwidth;
+ width = std::max(nwidth, width);
}
for (int i = 0; i < depth; ++i) {
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 9801627..e9660ac1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -585,7 +585,7 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
uint64_t ShiftedMask = (0xFFFFULL << Shift);
uint64_t ZeroChunk = UImm & ~ShiftedMask;
uint64_t OneChunk = UImm | ShiftedMask;
- uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
+ uint64_t RotatedImm = llvm::rotl(UImm, 32);
uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index 9358486..85700ae 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -439,41 +439,8 @@ let Predicates = [HasStdExtZvfbfmin] in {
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
- defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllBF16Vectors>;
- defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
- AllBF16Vectors, uimm5>;
- defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
- eew=16, vtilist=AllBF16Vectors>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllBF16Vectors, uimm5>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllBF16Vectors, uimm5>;
-
foreach fvti = AllBF16Vectors in {
- defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
- fvti.Vector,
- fvti.Vector, fvti.Vector, fvti.Mask,
- fvti.Log2SEW, fvti.LMul, fvti.RegClass,
- fvti.RegClass, fvti.RegClass>;
- defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
- "V"#fvti.ScalarSuffix#"M",
- fvti.Vector,
- fvti.Vector, fvti.Scalar, fvti.Mask,
- fvti.Log2SEW, fvti.LMul, fvti.RegClass,
- fvti.RegClass, fvti.ScalarRegClass>;
- defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
- def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$passthru),
- (fvti.Vector fvti.RegClass:$rs2),
- (fvti.Scalar (fpimm0)),
- (fvti.Mask VMV0:$vm), VLOpFrag)),
- (instr fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
-
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
- fvti.AVL, fvti.Log2SEW)>;
def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
@@ -489,24 +456,6 @@ let Predicates = [HasStdExtZvfbfmin] in {
(fvti.Vector (IMPLICIT_DEF)),
fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- (fvti.Vector (IMPLICIT_DEF)),
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- fvti.RegClass:$rs1,
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm),
- GPR:$vl, fvti.Log2SEW)>;
-
def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
(SplatFPOp (SelectScalarFPAsInt (XLenVT GPR:$imm))),
fvti.RegClass:$rs2,
@@ -525,42 +474,6 @@ let Predicates = [HasStdExtZvfbfmin] in {
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
fvti.RegClass:$passthru, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm),
GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- fvti.RegClass:$passthru,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$passthru, fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector
- (riscv_vrgather_vv_vl fvti.RegClass:$rs2,
- (ivti.Vector fvti.RegClass:$rs1),
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# fvti.LMul.MX#"_E"# fvti.SEW#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, fvti.RegClass:$rs1,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (riscv_vrgather_vx_vl fvti.RegClass:$rs2, GPR:$rs1,
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# fvti.LMul.MX#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, GPR:$rs1,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector
- (riscv_vrgather_vx_vl fvti.RegClass:$rs2,
- uimm5:$imm,
- fvti.RegClass:$passthru,
- (fvti.Mask VMV0:$vm),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# fvti.LMul.MX#"_MASK")
- fvti.RegClass:$passthru, fvti.RegClass:$rs2, uimm5:$imm,
- (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 100f1ec..53ec712 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -1879,28 +1879,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
unsigned OpCmp;
LLT Ty = MRI.getType(DstReg);
- switch (Ty.getSizeInBits()) {
- default:
- return false;
- case 8:
- OpCmp = X86::CMOV_GR8;
- break;
- case 16:
- OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
- break;
- case 32:
- OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
- break;
- case 64:
- assert(STI.is64Bit() && STI.canUseCMOV());
- OpCmp = X86::CMOV64rr;
- break;
+ if (Ty.getSizeInBits() == 80) {
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
+ DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg());
+ } else {
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 8:
+ OpCmp = X86::CMOV_GR8;
+ break;
+ case 16:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
+ break;
+ case 32:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
+ break;
+ case 64:
+ assert(STI.is64Bit() && STI.canUseCMOV());
+ OpCmp = X86::CMOV64rr;
+ break;
+ }
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg())
+ .addImm(X86::COND_E);
}
- BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
- .addReg(Sel.getTrueReg())
- .addReg(Sel.getFalseReg())
- .addImm(X86::COND_E);
-
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 28fa2cd..e792b1b 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -575,10 +575,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// todo: vectors and address spaces
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
+ .legalFor(!HasCMOV, {{s8, s32}})
+ .legalFor(Is64Bit, {{s64, s32}})
+ .legalFor(UseX87, {{s80, s32}})
+ .clampScalar(1, s32, s32)
.widenScalarToNextPow2(0, /*Min=*/8)
- .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
- .clampScalar(1, s32, s32);
+ .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
// memory intrinsics
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b05d7c7..b5f8ee5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41846,7 +41846,7 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
- Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
+ Imm = llvm::rotl<uint8_t>(Imm, 4);
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
DAG.getTargetConstant(Imm, DL, MVT::i8));
};
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 280eb20..febdc54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7192,7 +7192,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
- VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
+ VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks,
+ BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9cd52da..048a3e6 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5343,7 +5343,7 @@ private:
unsigned &OpCnt =
OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op) && OpCnt < NumOps)
+ if (!getScheduleCopyableData(EI, Op))
continue;
// Found copyable operand - continue.
++OpCnt;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 84d2ea6..fed04eb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1007,6 +1007,11 @@ public:
/// Creates a fixed-width vector containing all operands. The number of
/// operands matches the vector element count.
BuildVector,
+ /// Extracts all lanes from its (non-scalable) vector operand. This is an
+ /// abstract VPInstruction whose single defined VPValue represents VF
+ /// scalars extracted from a vector, to be replaced by VF ExtractElement
+ /// VPInstructions.
+ Unpack,
/// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
/// where one of (x,y) is loop invariant, and both x and y are integer type.
ComputeAnyOfResult,
@@ -2715,6 +2720,15 @@ public:
return R && classof(R);
}
+ static inline bool classof(const VPValue *VPV) {
+ const VPRecipeBase *R = VPV->getDefiningRecipe();
+ return R && classof(R);
+ }
+
+ static inline bool classof(const VPSingleDefRecipe *R) {
+ return classof(static_cast<const VPRecipeBase *>(R));
+ }
+
/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;
@@ -3100,6 +3114,9 @@ public:
/// Returns true if this expression contains recipes that may have side
/// effects.
bool mayHaveSideEffects() const;
+
+ /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
+ bool isSingleScalar() const;
};
/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 7e074c1..80a2e4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -110,6 +110,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::AnyOf:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
+ case VPInstruction::Unpack:
return SetResultTyFromOp();
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index d8203e2..b5b98c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -388,6 +388,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
}
+template <typename Op0_t, typename Op1_t>
+inline VPInstruction_match<Instruction::ExtractElement, Op0_t, Op1_t>
+m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) {
+ return m_VPInstruction<Instruction::ExtractElement>(Op0, Op1);
+}
+
template <typename Op0_t>
inline VPInstruction_match<VPInstruction::ExtractLastLanePerPart, Op0_t>
m_ExtractLastLanePerPart(const Op0_t &Op0) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d1e67e6b..a865b2d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -515,6 +515,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
case VPInstruction::Not:
+ case VPInstruction::Unpack:
return 1;
case Instruction::ICmp:
case Instruction::FCmp:
@@ -1246,6 +1247,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::StepVector:
case VPInstruction::ReductionStartVector:
case VPInstruction::VScale:
+ case VPInstruction::Unpack:
return false;
default:
return true;
@@ -1290,7 +1292,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::WidePtrAdd:
- return Op == getOperand(0);
+ // WidePtrAdd supports scalar and vector base addresses.
+ return false;
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
@@ -1417,6 +1420,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ResumeForEpilogue:
O << "resume-for-epilogue";
break;
+ case VPInstruction::Unpack:
+ O << "unpack";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -2888,6 +2894,13 @@ bool VPExpressionRecipe::mayHaveSideEffects() const {
return false;
}
+bool VPExpressionRecipe::isSingleScalar() const {
+ // Cannot use vputils::isSingleScalar(), because all external operands
+ // of the expression will be live-ins while bundled.
+ return isa<VPReductionRecipe>(ExpressionRecipes.back()) &&
+ !isa<VPPartialReductionRecipe>(ExpressionRecipes.back());
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f5f616f..fa1fdaf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1224,6 +1224,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
return;
}
+ uint64_t Idx;
+ if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
+ auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
+ Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
+ return;
+ }
+
if (auto *Phi = dyn_cast<VPPhi>(Def)) {
if (Phi->getNumOperands() == 1)
Phi->replaceAllUsesWith(Phi->getOperand(0));
@@ -3780,7 +3787,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
BTC->replaceAllUsesWith(TCMO);
}
-void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
+void VPlanTransforms::materializePacksAndUnpacks(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
@@ -3828,6 +3835,50 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
});
}
}
+
+ // Create explicit VPInstructions to convert vectors to scalars. The current
+ // implementation is conservative - it may miss some cases that may or may not
+ // be vector values. TODO: introduce Unpacks speculatively - remove them later
+ // if they are known to operate on scalar values.
+ for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe,
+ VPDerivedIVRecipe, VPCanonicalIVPHIRecipe>(&R))
+ continue;
+ for (VPValue *Def : R.definedValues()) {
+ // Skip recipes that are single-scalar or only have their first lane
+ // used.
+ // TODO: The Defs skipped here may or may not be vector values.
+ // Introduce Unpacks, and remove them later, if they are guaranteed to
+ // produce scalar values.
+ if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def))
+ continue;
+
+ // At the moment, we create unpacks only for scalar users outside
+ // replicate regions. Recipes inside replicate regions still extract the
+ // required lanes implicitly.
+ // TODO: Remove once replicate regions are unrolled completely.
+ auto IsCandidateUnpackUser = [Def](VPUser *U) {
+ VPRegionBlock *ParentRegion =
+ cast<VPRecipeBase>(U)->getParent()->getParent();
+ return U->usesScalars(Def) &&
+ (!ParentRegion || !ParentRegion->isReplicator());
+ };
+ if (none_of(Def->users(), IsCandidateUnpackUser))
+ continue;
+
+ auto *Unpack = new VPInstruction(VPInstruction::Unpack, {Def});
+ if (R.isPhi())
+ Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());
+ else
+ Unpack->insertAfter(&R);
+ Def->replaceUsesWithIf(Unpack,
+ [&IsCandidateUnpackUser](VPUser &U, unsigned) {
+ return IsCandidateUnpackUser(&U);
+ });
+ }
+ }
+ }
}
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5a8a2bb..b28559b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -325,9 +325,10 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);
- /// Add explicit Build[Struct]Vector recipes that combine multiple scalar
- /// values into single vectors.
- static void materializeBuildVectors(VPlan &Plan);
+ /// Add explicit Build[Struct]Vector recipes to Pack multiple scalar values
+ /// into vectors and Unpack recipes to extract scalars from vectors as
+ /// needed.
+ static void materializePacksAndUnpacks(VPlan &Plan);
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 5aeda3e..cfd1a74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -465,10 +465,21 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar
/// definitions for operands of \DefR.
-static VPRecipeWithIRFlags *
+static VPValue *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPRecipeWithIRFlags *DefR, VPLane Lane,
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
+ VPValue *Op;
+ if (match(DefR, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)))) {
+ auto LaneDefs = Def2LaneDefs.find(Op);
+ if (LaneDefs != Def2LaneDefs.end())
+ return LaneDefs->second[Lane.getKnownLane()];
+
+ VPValue *Idx =
+ Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
+ }
+
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : DefR->operands()) {
@@ -480,6 +491,10 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
continue;
}
if (Lane.getKind() == VPLane::Kind::ScalableLast) {
+ // Look through mandatory Unpack.
+ [[maybe_unused]] bool Matched =
+ match(Op, m_VPInstruction<VPInstruction::Unpack>(m_VPValue(Op)));
+ assert(Matched && "original op must have been Unpack");
NewOps.push_back(
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
@@ -547,7 +562,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
(isa<VPReplicateRecipe>(&R) &&
cast<VPReplicateRecipe>(&R)->isSingleScalar()) ||
(isa<VPInstruction>(&R) &&
- !cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
+ !cast<VPInstruction>(&R)->doesGeneratePerAllLanes() &&
+ cast<VPInstruction>(&R)->getOpcode() != VPInstruction::Unpack))
continue;
auto *DefR = cast<VPRecipeWithIRFlags>(&R);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 9a2497e..840a5b9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -84,6 +84,12 @@ inline bool isSingleScalar(const VPValue *VPV) {
return VPI->isSingleScalar() || VPI->isVectorToScalar() ||
(PreservesUniformity(VPI->getOpcode()) &&
all_of(VPI->operands(), isSingleScalar));
+ if (isa<VPPartialReductionRecipe>(VPV))
+ return false;
+ if (isa<VPReductionRecipe>(VPV))
+ return true;
+ if (auto *Expr = dyn_cast<VPExpressionRecipe>(VPV))
+ return Expr->isSingleScalar();
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
return isa<VPExpandSCEVRecipe>(VPV);