aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td46
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td11
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp82
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp9
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp12
11 files changed, 144 insertions, 76 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index aa078f3..e40fb76 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -704,9 +704,17 @@ void DwarfUnit::addType(DIE &Entity, const DIType *Ty,
addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty)));
}
+// FIXME: change callsites to use the new DW_LNAME_ language codes.
llvm::dwarf::SourceLanguage DwarfUnit::getSourceLanguage() const {
- return static_cast<llvm::dwarf::SourceLanguage>(
- getLanguage().getUnversionedName());
+ const auto &Lang = getLanguage();
+
+ if (!Lang.hasVersionedName())
+ return static_cast<llvm::dwarf::SourceLanguage>(Lang.getName());
+
+ return llvm::dwarf::toDW_LANG(
+ static_cast<llvm::dwarf::SourceLanguageName>(Lang.getName()),
+ Lang.getVersion())
+ .value_or(llvm::dwarf::DW_LANG_hi_user);
}
std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b9e01c3..66717b9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1367,9 +1367,8 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, (${rs1})",
class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
: Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt1 = XLenVT,
- ValueType vt2 = XLenVT>
- : Pat<(vt1 (OpNode (vt1 GPR:$rs1), (vt2 GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType,
ValueType vt = XLenVT>
@@ -1973,8 +1972,9 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs
/// Loads
-class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT>
- : Pat<(vt (LoadOp (AddrRegImm (XLenVT GPRMem:$rs1), simm12_lo:$imm12))),
+class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT,
+ ValueType PtrVT = XLenVT>
+ : Pat<(vt (LoadOp (AddrRegImm (PtrVT GPRMem:$rs1), simm12_lo:$imm12))),
(Inst GPRMem:$rs1, simm12_lo:$imm12)>;
def : LdPat<sextloadi8, LB>;
@@ -1988,8 +1988,8 @@ def : LdPat<zextloadi16, LHU>;
/// Stores
class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt>
- : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (XLenVT GPRMem:$rs1),
+ ValueType vt, ValueType PtrVT = XLenVT>
+ : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (PtrVT GPRMem:$rs1),
simm12_lo:$imm12)),
(Inst StTy:$rs2, GPRMem:$rs1, simm12_lo:$imm12)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 65e7e3b..afac37d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -544,8 +544,8 @@ def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12_l
} // Predicates = [HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZdinx, HasStdExtZilsd, IsRV32] in {
-def : LdPat<load, LD_RV32, f64>;
-def : StPat<store, SD_RV32, GPRPair, f64>;
+def : LdPat<load, LD_RV32, f64, i32>;
+def : StPat<store, SD_RV32, GPRPair, f64, i32>;
}
let Predicates = [HasStdExtD, IsRV32] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index d8f5d3e..aa8f1a1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -669,19 +669,19 @@ let Predicates = [HasVendorXCValu, IsRV32] in {
// Patterns for load & store operations
//===----------------------------------------------------------------------===//
class CVLdrrPat<PatFrag LoadOp, RVInst Inst>
- : Pat<(XLenVT (LoadOp CVrr:$regreg)),
+ : Pat<(i32 (LoadOp CVrr:$regreg)),
(Inst CVrr:$regreg)>;
class CVStriPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, simm12_lo:$imm12),
+ : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, simm12_lo:$imm12),
(Inst GPR:$rs2, GPR:$rs1, simm12_lo:$imm12)>;
class CVStrriPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), GPR:$rs1, GPR:$rs3),
+ : Pat<(StoreOp (i32 GPR:$rs2), GPR:$rs1, GPR:$rs3),
(Inst GPR:$rs2, GPR:$rs1, GPR:$rs3)>;
class CVStrrPat<PatFrag StoreOp, RVInst Inst>
- : Pat<(StoreOp (XLenVT GPR:$rs2), CVrr:$regreg),
+ : Pat<(StoreOp (i32 GPR:$rs2), CVrr:$regreg),
(Inst GPR:$rs2, CVrr:$regreg)>;
let Predicates = [HasVendorXCVmem, IsRV32], AddedComplexity = 1 in {
@@ -725,17 +725,17 @@ let Predicates = [HasVendorXCVbitmanip, IsRV32] in {
(CV_INSERT GPR:$rd, GPR:$rs1, (CV_HI5 cv_uimm10:$imm),
(CV_LO5 cv_uimm10:$imm))>;
- def : PatGpr<cttz, CV_FF1>;
- def : PatGpr<ctlz, CV_FL1>;
+ def : PatGpr<cttz, CV_FF1, i32>;
+ def : PatGpr<ctlz, CV_FL1, i32>;
def : PatGpr<int_riscv_cv_bitmanip_clb, CV_CLB>;
- def : PatGpr<ctpop, CV_CNT>;
+ def : PatGpr<ctpop, CV_CNT, i32>;
- def : PatGprGpr<rotr, CV_ROR>;
+ def : PatGprGpr<rotr, CV_ROR, i32>;
def : Pat<(int_riscv_cv_bitmanip_bitrev GPR:$rs1, cv_tuimm5:$pts,
cv_tuimm2:$radix),
(CV_BITREV GPR:$rs1, cv_tuimm2:$radix, cv_tuimm5:$pts)>;
- def : Pat<(bitreverse (XLenVT GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
+ def : Pat<(bitreverse (i32 GPR:$rs)), (CV_BITREV GPR:$rs, 0, 0)>;
}
class PatCoreVAluGpr<string intr, string asm> :
@@ -760,18 +760,18 @@ multiclass PatCoreVAluGprGprImm<Intrinsic intr> {
}
let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
- def : PatGpr<abs, CV_ABS>;
- def : PatGprGpr<setle, CV_SLE>;
- def : PatGprGpr<setule, CV_SLEU>;
- def : PatGprGpr<smin, CV_MIN>;
- def : PatGprGpr<umin, CV_MINU>;
- def : PatGprGpr<smax, CV_MAX>;
- def : PatGprGpr<umax, CV_MAXU>;
-
- def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>;
- def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
- def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
- def : Pat<(and (XLenVT GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>;
+ def : PatGpr<abs, CV_ABS, i32>;
+ def : PatGprGpr<setle, CV_SLE, i32>;
+ def : PatGprGpr<setule, CV_SLEU, i32>;
+ def : PatGprGpr<smin, CV_MIN, i32>;
+ def : PatGprGpr<umin, CV_MINU, i32>;
+ def : PatGprGpr<smax, CV_MAX, i32>;
+ def : PatGprGpr<umax, CV_MAXU, i32>;
+
+ def : Pat<(sext_inreg (i32 GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>;
+ def : Pat<(sext_inreg (i32 GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
+ def : Pat<(and (i32 GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
+ def : Pat<(and (i32 GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>;
defm CLIP : PatCoreVAluGprImm<int_riscv_cv_alu_clip>;
defm CLIPU : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>;
@@ -790,9 +790,9 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
//===----------------------------------------------------------------------===//
let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in {
- def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETEQ, bb:$imm12),
+ def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETEQ, bb:$imm12),
(CV_BEQIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>;
- def : Pat<(riscv_brcc GPR:$rs1, simm5:$imm5, SETNE, bb:$imm12),
+ def : Pat<(riscv_brcc (i32 GPR:$rs1), simm5:$imm5, SETNE, bb:$imm12),
(CV_BNEIMM GPR:$rs1, simm5:$imm5, bare_simm13_lsb0_bb:$imm12)>;
defm CC_SImm5_CV : SelectCC_GPR_riirr<GPR, simm5>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 5e1d07a..4537bfe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -1648,10 +1648,10 @@ def : Pat<(qc_setwmi (i32 GPR:$rs3), GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb0
} // Predicates = [HasVendorXqcilsm, IsRV32]
let Predicates = [HasVendorXqcili, IsRV32] in {
-def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>;
-def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tglobaladdr:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tblockaddress:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tjumptable:$A)), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(i32 (qc_e_li tconstpool:$A)), (QC_E_LI bare_simm32:$A)>;
} // Predicates = [HasVendorXqcili, IsRV32]
//===----------------------------------------------------------------------===/i
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 52a2b29..c31713e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -607,13 +607,16 @@ def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs
let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
-def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
-def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1, FRM_RNE)>;
+def : Pat<(any_fpround FPR64IN32X:$rs1),
+ (FCVT_H_D_IN32X FPR64IN32X:$rs1, (i32 FRM_DYN))>;
+def : Pat<(any_fpextend FPR16INX:$rs1),
+ (FCVT_D_H_IN32X FPR16INX:$rs1, (i32 FRM_RNE))>;
/// Float arithmetic operations
def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2),
- (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, FRM_RNE))>;
+ (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, (i32 FRM_DYN)))>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2),
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2, (i32 FRM_RNE)))>;
} // Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV32]
let Predicates = [HasStdExtZhinxmin, HasStdExtZdinx, IsRV64] in {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 989950f..a466ab2 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -316,6 +316,9 @@ private:
bool selectImageWriteIntrinsic(MachineInstr &I) const;
bool selectResourceGetPointer(Register &ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectResourceNonUniformIndex(Register &ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const;
bool selectModf(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
bool selectUpdateCounter(Register &ResVReg, const SPIRVType *ResType,
@@ -347,7 +350,7 @@ private:
SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding,
uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name,
+ StringRef Name,
MachineIRBuilder MIRBuilder) const;
SPIRVType *widenTypeToVec4(const SPIRVType *Type, MachineInstr &I) const;
bool extractSubvector(Register &ResVReg, const SPIRVType *ResType,
@@ -364,6 +367,7 @@ private:
MachineInstr &I) const;
bool loadHandleBeforePosition(Register &HandleReg, const SPIRVType *ResType,
GIntrinsic &HandleDef, MachineInstr &Pos) const;
+ void decorateUsesAsNonUniform(Register &NonUniformReg) const;
};
bool sampledTypeIsSignedInteger(const llvm::Type *HandleType) {
@@ -3465,6 +3469,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_discard: {
return selectDiscard(ResVReg, ResType, I);
}
+ case Intrinsic::spv_resource_nonuniformindex: {
+ return selectResourceNonUniformIndex(ResVReg, ResType, I);
+ }
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
@@ -3504,7 +3511,6 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
uint32_t Binding = getIConstVal(Intr.getOperand(3).getReg(), MRI);
uint32_t ArraySize = getIConstVal(MainHandleDef->getOperand(4).getReg(), MRI);
Register IndexReg = MainHandleDef->getOperand(5).getReg();
- const bool IsNonUniform = false;
std::string CounterName =
getStringValueFromReg(MainHandleDef->getOperand(6).getReg(), *MRI) +
".counter";
@@ -3513,7 +3519,7 @@ bool SPIRVInstructionSelector::selectCounterHandleFromBinding(
MachineIRBuilder MIRBuilder(I);
Register CounterVarReg = buildPointerToResource(
GR.getPointeeType(ResType), GR.getPointerStorageClass(ResType), Set,
- Binding, ArraySize, IndexReg, IsNonUniform, CounterName, MIRBuilder);
+ Binding, ArraySize, IndexReg, CounterName, MIRBuilder);
return BuildCOPY(ResVReg, CounterVarReg, I);
}
@@ -3713,6 +3719,55 @@ bool SPIRVInstructionSelector::selectResourceGetPointer(
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
+ Register &ResVReg, const SPIRVType *ResType, MachineInstr &I) const {
+ Register ObjReg = I.getOperand(2).getReg();
+ if (!BuildCOPY(ResVReg, ObjReg, I))
+ return false;
+
+ buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {});
+ // Check for the registers that use the index marked as non-uniform
+ // and recursively mark them as non-uniform.
+ // Per the spec, it's necessary that the final argument used for
+ // load/store/sample/atomic must be decorated, so we need to propagate the
+ // decoration through access chains and copies.
+ // https://docs.vulkan.org/samples/latest/samples/extensions/descriptor_indexing/README.html#_when_to_use_non_uniform_indexing_qualifier
+ decorateUsesAsNonUniform(ResVReg);
+ return true;
+}
+
+void SPIRVInstructionSelector::decorateUsesAsNonUniform(
+ Register &NonUniformReg) const {
+ llvm::SmallVector<Register> WorkList = {NonUniformReg};
+ while (WorkList.size() > 0) {
+ Register CurrentReg = WorkList.back();
+ WorkList.pop_back();
+
+ bool IsDecorated = false;
+ for (MachineInstr &Use : MRI->use_instructions(CurrentReg)) {
+ if (Use.getOpcode() == SPIRV::OpDecorate &&
+ Use.getOperand(1).getImm() == SPIRV::Decoration::NonUniformEXT) {
+ IsDecorated = true;
+ continue;
+ }
+ // Check if the instruction has the result register and add it to the
+ // worklist.
+ if (Use.getOperand(0).isReg() && Use.getOperand(0).isDef()) {
+ Register ResultReg = Use.getOperand(0).getReg();
+ if (ResultReg == CurrentReg)
+ continue;
+ WorkList.push_back(ResultReg);
+ }
+ }
+
+ if (!IsDecorated) {
+ buildOpDecorate(CurrentReg, *MRI->getVRegDef(CurrentReg), TII,
+ SPIRV::Decoration::NonUniformEXT, {});
+ }
+ }
+ return;
+}
+
bool SPIRVInstructionSelector::extractSubvector(
Register &ResVReg, const SPIRVType *ResType, Register &ReadReg,
MachineInstr &InsertionPoint) const {
@@ -3784,7 +3839,7 @@ bool SPIRVInstructionSelector::selectImageWriteIntrinsic(
Register SPIRVInstructionSelector::buildPointerToResource(
const SPIRVType *SpirvResType, SPIRV::StorageClass::StorageClass SC,
uint32_t Set, uint32_t Binding, uint32_t ArraySize, Register IndexReg,
- bool IsNonUniform, StringRef Name, MachineIRBuilder MIRBuilder) const {
+ StringRef Name, MachineIRBuilder MIRBuilder) const {
const Type *ResType = GR.getTypeForSPIRVType(SpirvResType);
if (ArraySize == 1) {
SPIRVType *PtrType =
@@ -3803,14 +3858,7 @@ Register SPIRVInstructionSelector::buildPointerToResource(
SPIRVType *ResPointerType =
GR.getOrCreateSPIRVPointerType(ResType, MIRBuilder, SC);
-
Register AcReg = MRI->createVirtualRegister(GR.getRegClass(ResPointerType));
- if (IsNonUniform) {
- // It is unclear which value needs to be marked an non-uniform, so both
- // the index and the access changed are decorated as non-uniform.
- buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {});
- }
MIRBuilder.buildInstr(SPIRV::OpAccessChain)
.addDef(AcReg)
@@ -4560,9 +4608,6 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
uint32_t Binding = foldImm(HandleDef.getOperand(3), MRI);
uint32_t ArraySize = foldImm(HandleDef.getOperand(4), MRI);
Register IndexReg = HandleDef.getOperand(5).getReg();
- // FIXME: The IsNonUniform flag needs to be set based on resource analysis.
- // https://github.com/llvm/llvm-project/issues/155701
- bool IsNonUniform = false;
std::string Name =
getStringValueFromReg(HandleDef.getOperand(6).getReg(), *MRI);
@@ -4576,13 +4621,8 @@ bool SPIRVInstructionSelector::loadHandleBeforePosition(
SC = GR.getPointerStorageClass(ResType);
}
- Register VarReg =
- buildPointerToResource(VarType, SC, Set, Binding, ArraySize, IndexReg,
- IsNonUniform, Name, MIRBuilder);
-
- if (IsNonUniform)
- buildOpDecorate(HandleReg, HandleDef, TII, SPIRV::Decoration::NonUniformEXT,
- {});
+ Register VarReg = buildPointerToResource(VarType, SC, Set, Binding, ArraySize,
+ IndexReg, Name, MIRBuilder);
// The handle for the buffer is the pointer to the resource. For an image, the
// handle is the image object. So images get an extra load.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e62d57e..50136a8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9348,13 +9348,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
- // Ensure that the start values for all header phi recipes are updated before
- // vectorizing the epilogue loop.
VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
+ // When vectorizing the epilogue loop, the canonical induction needs to be
+ // adjusted by the value after the main vector loop. Find the resume value
+ // created during execution of the main VPlan. It must be the first phi in the
+ // loop preheader. Use the value to increment the canonical IV, and update all
+ // users in the loop region to use the adjusted value.
// FIXME: Improve modeling for canonical IV start values in the epilogue
// loop.
using namespace llvm::PatternMatch;
@@ -9389,10 +9388,16 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+ IV->replaceAllUsesWith(Add);
+ Add->setOperand(0, IV);
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
+ // Ensure that the start values for all header phi recipes are updated before
+ // vectorizing the epilogue loop. Skip the canonical IV, which has been
+ // handled above.
for (VPRecipeBase &R : drop_begin(Header->phis())) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 2555ebe..1fea068 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1777,6 +1777,9 @@ InstructionCost VPCostContext::getScalarizationOverhead(
if (VF.isScalar())
return 0;
+ assert(!VF.isScalable() &&
+ "Scalarization overhead not supported for scalable vectors");
+
InstructionCost ScalarizationCost = 0;
// Compute the cost of scalarizing the result if needed.
if (!ResultTy->isVoidTy()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 600ff8a..8e916772 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3174,6 +3174,9 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
// transform, avoid computing their cost multiple times for now.
Ctx.SkipCostComputation.insert(UI);
+ if (VF.isScalable() && !isSingleScalar())
+ return InstructionCost::getInvalid();
+
switch (UI->getOpcode()) {
case Instruction::GetElementPtr:
// We mark this instruction as zero-cost because the cost of GEPs in
@@ -3221,9 +3224,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
return ScalarCallCost;
}
- if (VF.isScalable())
- return InstructionCost::getInvalid();
-
return ScalarCallCost * VF.getFixedValue() +
Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);
}
@@ -3274,9 +3274,6 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
}
case Instruction::Load:
case Instruction::Store: {
- if (VF.isScalable() && !isSingleScalar())
- return InstructionCost::getInvalid();
-
// TODO: See getMemInstScalarizationCost for how to handle replicating and
// predicated cases.
const VPRegionBlock *ParentRegion = getParent()->getParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c8a2d84..7563cd7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1234,6 +1234,18 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
if (!Plan->isUnrolled())
return;
+ // Hoist an invariant increment Y of a phi X, by having X start at Y.
+ if (match(Def, m_c_Add(m_VPValue(X), m_VPValue(Y))) && Y->isLiveIn() &&
+ isa<VPPhi>(X)) {
+ auto *Phi = cast<VPPhi>(X);
+ if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&
+ Phi->getNumUsers() == 1 && (*Phi->user_begin() == &R)) {
+ Phi->setOperand(0, Y);
+ Def->replaceAllUsesWith(Phi);
+ return;
+ }
+ }
+
// VPVectorPointer for part 0 can be replaced by their start pointer.
if (auto *VecPtr = dyn_cast<VPVectorPointerRecipe>(&R)) {
if (VecPtr->isFirstPart()) {