aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp19
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp21
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp28
-rw-r--r--llvm/lib/Object/ELF.cpp26
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp18
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp6
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp10
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td21
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td33
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp82
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td4
-rw-r--r--llvm/lib/TargetParser/TargetDataLayout.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp14
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHelpers.h5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp23
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.cpp50
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanUtils.h3
31 files changed, 372 insertions, 72 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8aa488f..f65d88a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1443,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
MF.hasBBSections() && NumMBBSectionRanges > 1,
// Use static_cast to avoid breakage of tests on windows.
static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls,
- static_cast<bool>(EmitBBHash)};
+ static_cast<bool>(EmitBBHash), false};
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index fbcd614..485b44ae 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -287,6 +287,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 'h': { // Basic block hash secifier.
+ // Skip the profile when the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ for (auto BBIDHashStr : Values) {
+ auto [BBIDStr, HashStr] = BBIDHashStr.split(':');
+ unsigned long long BBID = 0, Hash = 0;
+ if (getAsUnsignedInteger(BBIDStr, 10, BBID))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ BBIDStr + "'");
+ if (getAsUnsignedInteger(HashStr, 16, Hash))
+ return createProfileParseError(
+ Twine("unsigned integer expected in hex format: '") + HashStr +
+ "'");
+ FI->second.BBHashes[BBID] = Hash;
+ }
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ca82857..5fab6ec 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1893,6 +1893,8 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_UADDSAT:
case TargetOpcode::G_SSUBSAT:
case TargetOpcode::G_USUBSAT:
+ case TargetOpcode::G_SBFX:
+ case TargetOpcode::G_UBFX:
return false;
case TargetOpcode::G_SSHLSAT:
case TargetOpcode::G_USHLSAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1ef5dc2..893556b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2715,6 +2715,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);
AddToWorklist(Add.getNode());
+ // We can't set InBounds even if both original ptradds were InBounds and
+ // NUW: SDAG usually represents pointers as integers, therefore, the
+ // matched pattern behaves as if it had implicit casts:
+ // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z)
+ // The outer inbounds ptradd might therefore rely on a provenance that x
+ // does not have.
return DAG.getMemBasePlusOffset(X, Add, DL, Flags);
}
}
@@ -2740,6 +2746,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
// that.
SDNodeFlags Flags =
(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ // We can't set InBounds even if both original ptradds were InBounds and
+ // NUW: SDAG usually represents pointers as integers, therefore, the
+ // matched pattern behaves as if it had implicit casts:
+ // (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c)
+ // The outer inbounds ptradd might therefore rely on a provenance that
+ // GA does not have.
SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);
AddToWorklist(Inner.getNode());
return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags);
@@ -2763,8 +2775,13 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {
bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);
// If both additions in the original were NUW, reassociation preserves that.
- SDNodeFlags ReassocFlags =
- (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap;
+ SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags();
+ SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap;
+ if (CommonFlags.hasNoUnsignedWrap()) {
+ // If both operations are NUW and the PTRADD is inbounds, the offests are
+ // both non-negative, so the reassociated PTRADDs are also inbounds.
+ ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds;
+ }
if (ZIsConstant != YIsConstant) {
if (YIsConstant)
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index db5cc37..6c78ef0 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -129,6 +129,25 @@ prettyLanguageVersionString(const DWARFAttribute &AttrValue,
static_cast<SourceLanguageName>(*LName), *LVersion);
}
+static llvm::Expected<llvm::StringRef>
+getApplePropertyName(const DWARFDie &PropDIE) {
+ if (!PropDIE)
+ return llvm::createStringError("invalid DIE");
+
+ if (PropDIE.getTag() != DW_TAG_APPLE_property)
+ return llvm::createStringError("not referencing a DW_TAG_APPLE_property");
+
+ auto PropNameForm = PropDIE.find(DW_AT_APPLE_property_name);
+ if (!PropNameForm)
+ return "";
+
+ auto NameOrErr = PropNameForm->getAsCString();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+
+ return *NameOrErr;
+}
+
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
DIDumpOptions DumpOpts) {
@@ -233,6 +252,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
Die.getAttributeValueAsReferencedDie(FormValue).getName(
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
+ } else if (Attr == DW_AT_APPLE_property) {
+ auto PropDIE = Die.getAttributeValueAsReferencedDie(FormValue);
+ if (auto PropNameOrErr = getApplePropertyName(PropDIE))
+ OS << Space << "\"" << *PropNameOrErr << '\"';
+ else
+ DumpOpts.RecoverableErrorHandler(createStringError(
+ errc::invalid_argument,
+ llvm::formatv("decoding DW_AT_APPLE_property_name: {}",
+ toString(PropNameOrErr.takeError()))));
} else if (Attr == DW_AT_type || Attr == DW_AT_containing_type) {
DWARFDie D = resolveReferencedType(Die, FormValue);
if (D && !D.isNULL()) {
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 6da97f9..354c51d 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -831,17 +831,17 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
};
uint8_t Version = 0;
- uint8_t Feature = 0;
+ uint16_t Feature = 0;
BBAddrMap::Features FeatEnable{};
while (!ULEBSizeErr && !MetadataDecodeErr && Cur &&
Cur.tell() < Content.size()) {
Version = Data.getU8(Cur);
if (!Cur)
break;
- if (Version < 2 || Version > 4)
+ if (Version < 2 || Version > 5)
return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +
Twine(static_cast<int>(Version)));
- Feature = Data.getU8(Cur); // Feature byte
+ Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur);
if (!Cur)
break;
auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature);
@@ -858,6 +858,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
"basic block hash feature is enabled: version = " +
Twine(static_cast<int>(Version)) +
" feature = " + Twine(static_cast<int>(Feature)));
+ if (FeatEnable.PostLinkCfg && Version < 5)
+ return createError("version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when "
+ "post link cfg feature is enabled: version = " +
+ Twine(static_cast<int>(Version)) +
+ " feature = " + Twine(static_cast<int>(Feature)));
uint32_t NumBlocksInBBRange = 0;
uint32_t NumBBRanges = 1;
typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0;
@@ -946,6 +951,10 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
uint64_t BBF = FeatEnable.BBFreq
? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)
: 0;
+ uint32_t PostLinkBBFreq =
+ FeatEnable.PostLinkCfg
+ ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr)
+ : 0;
// Branch probability
llvm::SmallVector<PGOAnalysisMap::PGOBBEntry::SuccessorEntry, 2>
@@ -955,13 +964,20 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
for (uint64_t I = 0; I < SuccCount; ++I) {
uint32_t BBID = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
uint32_t BrProb = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);
+ uint32_t PostLinkFreq =
+ FeatEnable.PostLinkCfg
+ ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr)
+ : 0;
+
if (PGOAnalyses)
- Successors.push_back({BBID, BranchProbability::getRaw(BrProb)});
+ Successors.push_back(
+ {BBID, BranchProbability::getRaw(BrProb), PostLinkFreq});
}
}
if (PGOAnalyses)
- PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)});
+ PGOBBEntries.push_back(
+ {BlockFrequency(BBF), PostLinkBBFreq, std::move(Successors)});
}
if (PGOAnalyses)
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 8b75fbe..8530785 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1465,13 +1465,19 @@ void ELFState<ELFT>::writeSectionContent(
for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {
// Write version and feature values.
if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) {
- if (E.Version > 4)
+ if (E.Version > 5)
WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "
<< static_cast<int>(E.Version)
<< "; encoding using the most recent version";
CBA.write(E.Version);
- CBA.write(E.Feature);
- SHeader.sh_size += 2;
+ SHeader.sh_size += 1;
+ if (E.Version < 5) {
+ CBA.write(static_cast<uint8_t>(E.Feature));
+ SHeader.sh_size += 1;
+ } else {
+ CBA.write<uint16_t>(E.Feature, ELFT::Endianness);
+ SHeader.sh_size += 2;
+ }
}
auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature);
bool MultiBBRangeFeatureEnabled = false;
@@ -1556,11 +1562,15 @@ void ELFState<ELFT>::writeSectionContent(
for (const auto &PGOBBE : PGOBBEntries) {
if (PGOBBE.BBFreq)
SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq);
+ if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value())
+ SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0));
if (PGOBBE.Successors) {
SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size());
- for (const auto &[ID, BrProb] : *PGOBBE.Successors) {
+ for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) {
SHeader.sh_size += CBA.writeULEB128(ID);
SHeader.sh_size += CBA.writeULEB128(BrProb);
+ if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value())
+ SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0));
}
}
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index f8a84b0..e5e5fc2 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1886,7 +1886,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping(
IO &IO, ELFYAML::BBAddrMapEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapRequired("Version", E.Version);
- IO.mapOptional("Feature", E.Feature, Hex8(0));
+ IO.mapOptional("Feature", E.Feature, Hex16(0));
IO.mapOptional("NumBBRanges", E.NumBBRanges);
IO.mapOptional("BBRanges", E.BBRanges);
}
@@ -1920,6 +1920,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry>::mapping(
IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) {
assert(IO.getContext() && "The IO context is not initialized");
IO.mapOptional("BBFreq", E.BBFreq);
+ IO.mapOptional("PostLinkBBFreq", E.PostLinkBBFreq);
IO.mapOptional("Successors", E.Successors);
}
@@ -1929,6 +1930,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry>::
assert(IO.getContext() && "The IO context is not initialized");
IO.mapRequired("ID", E.ID);
IO.mapRequired("BrProb", E.BrProb);
+ IO.mapOptional("PostLinkBrFreq", E.PostLinkBrFreq);
}
void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO,
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
index 3b810d0..79863e1 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp
@@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass {
public:
static char ID;
- HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {}
+ HexagonCopyHoisting() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon Copy Hoisting"; }
@@ -56,8 +56,8 @@ public:
void moveCopyInstr(MachineBasicBlock *DestBB,
std::pair<Register, Register> Key, MachineInstr *MI);
- MachineFunction *MFN;
- MachineRegisterInfo *MRI;
+ MachineFunction *MFN = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
std::vector<DenseMap<std::pair<Register, Register>, MachineInstr *>>
CopyMIList;
};
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
index 93418f7..a10c937 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
@@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions,
namespace {
class HexagonGenMemAbsolute : public MachineFunctionPass {
- const HexagonInstrInfo *TII;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
public:
static char ID;
- HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {}
+ HexagonGenMemAbsolute() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index 1637b91..d19920c 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -612,6 +612,9 @@ let Predicates = [UseHVX] in {
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
def: Pat<(VecQ32 (trunc HVI32:$Vs)),
(V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;
+ def: Pat<(VecQ16 (trunc HWI32:$Vss)),
+ (Combineq(VecQ32(V6_vandvrt (HiVec $Vss), (ToI32 0x01010101))),
+ (VecQ32 (V6_vandvrt (LoVec $Vss), (ToI32 0x01010101))))>;
}
let Predicates = [UseHVX] in {
diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
index 71bdfc66..5a85f34 100644
--- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp
@@ -43,7 +43,7 @@ namespace {
class HexagonTfrCleanup : public MachineFunctionPass {
public:
static char ID;
- HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {}
+ HexagonTfrCleanup() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Hexagon TFR Cleanup"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -52,8 +52,8 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- const HexagonInstrInfo *HII;
- const TargetRegisterInfo *TRI;
+ const HexagonInstrInfo *HII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
typedef DenseMap<unsigned, uint64_t> ImmediateMap;
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73..e86b21c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))),
// FP Rounding
let Predicates = [HasBasicF, IsLA64] in {
def : PatFpr<frint, FRINT_S, FPR32>;
+def : PatFpr<flog2, FLOGB_S, FPR32>;
} // Predicates = [HasBasicF, IsLA64]
let Predicates = [HasBasicF, IsLA32] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa..2e88254 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;
// FP Rounding
let Predicates = [HasBasicD, IsLA64] in {
def : PatFpr<frint, FRINT_D, FPR64>;
+def : PatFpr<flog2, FLOGB_D, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
/// Pseudo-instructions needed for the soft-float ABI with LA32D
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6..a6de839 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
+ }
if (!Subtarget.hasBasicD()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
@@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
Subtarget.isSoftFPABI() ? LibCall : Custom);
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f64, Legal);
+ }
}
// Set operations for 'LSX' feature.
@@ -362,6 +366,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
@@ -443,6 +448,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FNEG, VT, Legal);
+ setOperationAction(ISD::FLOG2, VT, Legal);
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 613dea6..ca4ee5f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1593,6 +1593,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),
// XVFSQRT_{S/D}
defm : PatXrF<fsqrt, "XVFSQRT">;
+// XVFLOGB_{S/D}
+defm : PatXrF<flog2, "XVFLOGB">;
+
// XVRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),
(XVFRECIP_S v8f32:$xj)>;
@@ -2024,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
+// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "XVAVG_B", v32i8>;
+defm : VAvgPat<sra, "XVAVG_H", v16i16>;
+defm : VAvgPat<sra, "XVAVG_W", v8i32>;
+defm : VAvgPat<sra, "XVAVG_D", v4i64>;
+defm : VAvgPat<srl, "XVAVG_BU", v32i8>;
+defm : VAvgPat<srl, "XVAVG_HU", v16i16>;
+defm : VAvgPat<srl, "XVAVG_WU", v8i32>;
+defm : VAvgPat<srl, "XVAVG_DU", v4i64>;
+defm : VAvgrPat<sra, "XVAVGR_B", v32i8>;
+defm : VAvgrPat<sra, "XVAVGR_H", v16i16>;
+defm : VAvgrPat<sra, "XVAVGR_W", v8i32>;
+defm : VAvgrPat<sra, "XVAVGR_D", v4i64>;
+defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>;
+defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>;
+defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>;
+defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>;
+
// abs
def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;
def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 4619c6b..92402ba 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
}
}
+multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
+multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> {
+ def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)),
+ (vt (vsplat_imm_eq_1)))),
+ (vt (vsplat_imm_eq_1))),
+ (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1783,6 +1795,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),
// VFSQRT_{S/D}
defm : PatVrF<fsqrt, "VFSQRT">;
+// VFLOGB_{S/D}
+defm : PatVrF<flog2, "VFLOGB">;
+
// VFRECIP_{S/D}
def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),
(VFRECIP_S v4f32:$vj)>;
@@ -2154,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in),
def : Pat<(f64 f64imm_vldi:$in),
(f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>;
+// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU}
+defm : VAvgPat<sra, "VAVG_B", v16i8>;
+defm : VAvgPat<sra, "VAVG_H", v8i16>;
+defm : VAvgPat<sra, "VAVG_W", v4i32>;
+defm : VAvgPat<sra, "VAVG_D", v2i64>;
+defm : VAvgPat<srl, "VAVG_BU", v16i8>;
+defm : VAvgPat<srl, "VAVG_HU", v8i16>;
+defm : VAvgPat<srl, "VAVG_WU", v4i32>;
+defm : VAvgPat<srl, "VAVG_DU", v2i64>;
+defm : VAvgrPat<sra, "VAVGR_B", v16i8>;
+defm : VAvgrPat<sra, "VAVGR_H", v8i16>;
+defm : VAvgrPat<sra, "VAVGR_W", v4i32>;
+defm : VAvgrPat<sra, "VAVGR_D", v2i64>;
+defm : VAvgrPat<srl, "VAVGR_BU", v16i8>;
+defm : VAvgrPat<srl, "VAVGR_HU", v8i16>;
+defm : VAvgrPat<srl, "VAVGR_WU", v4i32>;
+defm : VAvgrPat<srl, "VAVGR_DU", v2i64>;
+
// abs
def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;
def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 000d296..4ff489d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -296,8 +296,9 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM,
CodeGenOptLevel OL, bool JIT)
- : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU,
- computeFSAdditions(FS, OL, TT), Options,
+ : CodeGenTargetMachineImpl(T,
+ TT.computeDataLayout(Options.MCOptions.ABIName),
+ TT, CPU, computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM),
getEffectivePPCCodeModel(TT, CM, JIT), OL),
TLOF(createTLOF(getTargetTriple())),
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 8198173..282cf5d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -92,6 +92,10 @@ private:
void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,
MachineIRBuilder &MIB) const;
bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ void addVectorLoadStoreOperands(MachineInstr &I,
+ SmallVectorImpl<SrcOp> &SrcOps,
+ unsigned &CurOp, bool IsMasked,
+ bool IsStrided) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
@@ -716,6 +720,26 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {
return GenericOpc;
}
+void RISCVInstructionSelector::addVectorLoadStoreOperands(
+ MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp,
+ bool IsMasked, bool IsStrided) const {
+ // Base Pointer
+ auto PtrReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PtrReg);
+
+ // Stride
+ if (IsStrided) {
+ auto StrideReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(StrideReg);
+ }
+
+ // Mask
+ if (IsMasked) {
+ auto MaskReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(MaskReg);
+ }
+}
+
bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineIRBuilder &MIB) const {
// Find the intrinsic ID.
@@ -752,21 +776,7 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
SrcOps.push_back(Register(RISCV::NoRegister));
}
- // Base Pointer
- auto PtrReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(PtrReg);
-
- // Stride
- if (IsStrided) {
- auto StrideReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(StrideReg);
- }
-
- // Mask
- if (IsMasked) {
- auto MaskReg = I.getOperand(CurOp++).getReg();
- SrcOps.push_back(MaskReg);
- }
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
const RISCV::VLEPseudo *P =
@@ -795,6 +805,48 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(
I.eraseFromParent();
return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
}
+ case Intrinsic::riscv_vsm:
+ case Intrinsic::riscv_vse:
+ case Intrinsic::riscv_vse_mask:
+ case Intrinsic::riscv_vsse:
+ case Intrinsic::riscv_vsse_mask: {
+ bool IsMasked = IntrinID == Intrinsic::riscv_vse_mask ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ bool IsStrided = IntrinID == Intrinsic::riscv_vsse ||
+ IntrinID == Intrinsic::riscv_vsse_mask;
+ LLT VT = MRI->getType(I.getOperand(1).getReg());
+ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
+
+ // Sources
+ unsigned CurOp = 1;
+ SmallVector<SrcOp, 4> SrcOps; // Source registers.
+
+ // Store value
+ auto PassthruReg = I.getOperand(CurOp++).getReg();
+ SrcOps.push_back(PassthruReg);
+
+ addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);
+
+ RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));
+ const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
+ IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
+
+ auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps);
+
+ // Select VL
+ auto VLOpFn = renderVLOp(I.getOperand(CurOp++));
+ for (auto &RenderFn : *VLOpFn)
+ RenderFn(PseudoMI);
+
+ // SEW
+ PseudoMI.addImm(Log2SEW);
+
+ // Memref
+ PseudoMI.cloneMemRefs(I);
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 4105618..526675a 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -127,6 +127,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCMAX:
+ case RISCV::PseudoCCMAXU:
+ case RISCV::PseudoCCMIN:
+ case RISCV::PseudoCCMINU:
case RISCV::PseudoCCADDW:
case RISCV::PseudoCCSUBW:
case RISCV::PseudoCCSLL:
@@ -217,6 +221,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
.addImm(0);
} else {
unsigned NewOpc;
+ // clang-format off
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
@@ -228,6 +233,10 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCMAX: NewOpc = RISCV::MAX; break;
+ case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
+ case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
+ case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;
@@ -250,6 +259,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break;
case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break;
}
+ // clang-format on
if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) {
BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index b4556f6..cfee6ab 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1851,6 +1851,11 @@ def TuneShortForwardBranchOpt
def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;
def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">;
+def TuneShortForwardBranchIMinMax
+ : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax",
+ "true", "Enable short forward branch optimization for min,max instructions in Zbb",
+ [TuneShortForwardBranchOpt]>;
+
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
def TuneNoSinkSplatOperands
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 912b82d..3a7013d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1699,6 +1699,10 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
case RISCV::AND: return RISCV::PseudoCCAND;
case RISCV::OR: return RISCV::PseudoCCOR;
case RISCV::XOR: return RISCV::PseudoCCXOR;
+ case RISCV::MAX: return RISCV::PseudoCCMAX;
+ case RISCV::MAXU: return RISCV::PseudoCCMAXU;
+ case RISCV::MIN: return RISCV::PseudoCCMIN;
+ case RISCV::MINU: return RISCV::PseudoCCMINU;
case RISCV::ADDI: return RISCV::PseudoCCADDI;
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1735,7 +1739,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
/// return the defining instruction.
static MachineInstr *canFoldAsPredicatedOp(Register Reg,
const MachineRegisterInfo &MRI,
- const TargetInstrInfo *TII) {
+ const TargetInstrInfo *TII,
+ const RISCVSubtarget &STI) {
if (!Reg.isVirtual())
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@@ -1743,6 +1748,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
+
+ if (!STI.hasShortForwardBranchIMinMax() &&
+ (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN ||
+ MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
+ return nullptr;
+
// Check if MI can be predicated and folded into the CCMOV.
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
@@ -1806,10 +1817,10 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI =
- canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);
bool Invert = !DefMI;
if (!DefMI)
- DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
+ DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);
if (!DefMI)
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 0114fbd..5a67a5a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -106,6 +106,10 @@ def PseudoCCSRA : SFBALU_rr;
def PseudoCCAND : SFBALU_rr;
def PseudoCCOR : SFBALU_rr;
def PseudoCCXOR : SFBALU_rr;
+def PseudoCCMAX : SFBALU_rr;
+def PseudoCCMIN : SFBALU_rr;
+def PseudoCCMAXU : SFBALU_rr;
+def PseudoCCMINU : SFBALU_rr;
def PseudoCCADDI : SFBALU_ri;
def PseudoCCANDI : SFBALU_ri;
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp
index d765d9c..d735923 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -208,7 +208,7 @@ static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) {
return Ret;
}
-static std::string computePowerDataLayout(const Triple &T) {
+static std::string computePowerDataLayout(const Triple &T, StringRef ABIName) {
bool is64Bit = T.isPPC64();
std::string Ret;
@@ -228,7 +228,8 @@ static std::string computePowerDataLayout(const Triple &T) {
// If the target ABI uses function descriptors, then the alignment of function
// pointers depends on the alignment used to emit the descriptor. Otherwise,
// function pointers are aligned to 32 bits because the instructions must be.
- if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) {
+ if ((T.getArch() == Triple::ppc64 &&
+ (!T.isPPC64ELFv2ABI() && ABIName != "elfv2"))) {
Ret += "-Fi64";
} else if (T.isOSAIX()) {
Ret += is64Bit ? "-Fi64" : "-Fi32";
@@ -573,7 +574,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {
case Triple::ppcle:
case Triple::ppc64:
case Triple::ppc64le:
- return computePowerDataLayout(*this);
+ return computePowerDataLayout(*this, ABIName);
case Triple::r600:
case Triple::amdgcn:
return computeAMDDataLayout(*this);
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b03fb62..7f6d779 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5977,14 +5977,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
}
// Prune obsolete incoming values off the successors' PHI nodes.
- for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(Dest->phis())) {
unsigned PreviousEdges = Cases->size();
if (Dest == SI->getDefaultDest())
++PreviousEdges;
for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
- for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
+ for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
unsigned PreviousEdges = OtherCases->size();
if (OtherDest == SI->getDefaultDest())
++PreviousEdges;
@@ -5993,7 +5993,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
if (NewBI->isUnconditional())
++E;
for (unsigned I = 0; I != E; ++I)
- cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+ PHI.removeIncomingValue(SI->getParent());
}
// Clean up the default block - it may have phis or other instructions before
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 505fb43..25bf49d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
continue;
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(*Plan, VF, CostCtx);
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
// Add on other costs that are modelled in VPlan, but not in the legacy
// cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPRegionBlock *VectorRegion = P->getVectorLoopRegion();
assert(VectorRegion && "Expected to have a vector region!");
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -6876,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
ElementCount VF) const {
- VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE());
+ VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(),
+ OrigLoop);
InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);
// Now compute and add the VPlan-based cost.
@@ -7110,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
// case, don't trigger the assertion, as the extra simplifications may cause a
// different VF to be picked by the VPlan-based cost model.
VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
// Verify that the VPlan-based and legacy cost models agree, except for VPlans
// with early exits and plans with additional VPlan simplifications. The
// legacy cost model doesn't properly model costs for such loops.
assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() ||
+ !Legal->getLAI()->getSymbolicStrides().empty() ||
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
CostCtx, OrigLoop,
BestFactor.Width) ||
@@ -8441,7 +8443,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// and mulacc-reduction are implemented.
if (!CM.foldTailWithEVL()) {
VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind,
- *CM.PSE.getSE());
+ *CM.PSE.getSE(), OrigLoop);
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,
CostCtx, Range);
}
@@ -9911,7 +9913,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM,
- CM.CostKind, *CM.PSE.getSE());
+ CM.CostKind, *CM.PSE.getSE(), L);
if (!ForceVectorization &&
!isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,
LVP.getPlanFor(VF.Width), SEL,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index 2aaabd9..965426f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -350,13 +350,14 @@ struct VPCostContext {
SmallPtrSet<Instruction *, 8> SkipCostComputation;
TargetTransformInfo::TargetCostKind CostKind;
ScalarEvolution &SE;
+ const Loop *L;
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
const VPlan &Plan, LoopVectorizationCostModel &CM,
TargetTransformInfo::TargetCostKind CostKind,
- ScalarEvolution &SE)
+ ScalarEvolution &SE, const Loop *L)
: TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM),
- CostKind(CostKind), SE(SE) {}
+ CostKind(CostKind), SE(SE), L(L) {}
/// Return the cost for \p UI with \p VF using the legacy cost model as
/// fallback until computing the cost of all recipes migrates to VPlan.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a63c80..bde62dd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3167,26 +3167,30 @@ bool VPReplicateRecipe::shouldPack() const {
});
}
-/// Returns true if \p Ptr is a pointer computation for which the legacy cost
-/// model computes a SCEV expression when computing the address cost.
-static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) {
+/// Returns a SCEV expression for \p Ptr if it is a pointer computation for
+/// which the legacy cost model computes a SCEV expression when computing the
+/// address cost. Computing SCEVs for VPValues is incomplete and returns
+/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In
+/// those cases we fall back to the legacy cost model. Otherwise return nullptr.
+static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE,
+ const Loop *L) {
auto *PtrR = Ptr->getDefiningRecipe();
if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&
cast<VPReplicateRecipe>(PtrR)->getOpcode() ==
Instruction::GetElementPtr) ||
isa<VPWidenGEPRecipe>(PtrR) ||
match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue()))))
- return false;
+ return nullptr;
// We are looking for a GEP where all indices are either loop invariant or
// inductions.
for (VPValue *Opd : drop_begin(PtrR->operands())) {
if (!Opd->isDefinedOutsideLoopRegions() &&
!isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd))
- return false;
+ return nullptr;
}
- return true;
+ return vputils::getSCEVExprForVPValue(Ptr, SE, L);
}
/// Returns true if \p V is used as part of the address of another load or
@@ -3354,9 +3358,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
bool IsLoad = UI->getOpcode() == Instruction::Load;
const VPValue *PtrOp = getOperand(!IsLoad);
- // TODO: Handle cases where we need to pass a SCEV to
- // getAddressComputationCost.
- if (shouldUseAddressAccessSCEV(PtrOp))
+ const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L);
+ if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))
break;
Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0));
@@ -3374,7 +3377,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
InstructionCost ScalarCost =
ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(
PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE,
- nullptr, Ctx.CostKind);
+ PtrSCEV, Ctx.CostKind);
if (isSingleScalar())
return ScalarCost;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 4d98014..986c801 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1419,6 +1419,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
true /*IsSingleScalar*/);
Clone->insertBefore(RepOrWidenR);
RepOrWidenR->replaceAllUsesWith(Clone);
+ if (isDeadRecipe(*RepOrWidenR))
+ RepOrWidenR->eraseFromParent();
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
index 4db92e7..8c23e78 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
@@ -75,7 +75,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
B == Plan.getBackedgeTakenCount();
}
-const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
+const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V,
+ ScalarEvolution &SE, const Loop *L) {
if (V->isLiveIn()) {
if (Value *LiveIn = V->getLiveInIRValue())
return SE.getSCEV(LiveIn);
@@ -86,6 +87,53 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {
return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())
.Case<VPExpandSCEVRecipe>(
[](const VPExpandSCEVRecipe *R) { return R->getSCEV(); })
+ .Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) {
+ if (!L)
+ return SE.getCouldNotCompute();
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L,
+ SCEV::FlagAnyWrap);
+ })
+ .Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) {
+ const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L);
+ if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>))
+ return SE.getCouldNotCompute();
+
+ return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()),
+ SE.getMulExpr(IV, SE.getTruncateOrSignExtend(
+ Scale, IV->getType())));
+ })
+ .Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) {
+ const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L);
+ if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) ||
+ !Step->isOne())
+ return SE.getCouldNotCompute();
+ return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()),
+ Step);
+ })
+ .Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) {
+ if (R->getOpcode() != Instruction::GetElementPtr)
+ return SE.getCouldNotCompute();
+
+ const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L);
+ if (isa<SCEVCouldNotCompute>(Base))
+ return SE.getCouldNotCompute();
+
+ SmallVector<const SCEV *> IndexExprs;
+ for (VPValue *Index : drop_begin(R->operands())) {
+ const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L);
+ if (isa<SCEVCouldNotCompute>(IndexExpr))
+ return SE.getCouldNotCompute();
+ IndexExprs.push_back(IndexExpr);
+ }
+
+ Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr())
+ ->getSourceElementType();
+ return SE.getGEPExpr(Base, IndexExprs, SrcElementTy);
+ })
.Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 37cd413..c21a0e7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr);
/// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no
/// SCEV expression could be constructed.
-const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
+const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE,
+ const Loop *L = nullptr);
/// Returns true if \p VPV is a single scalar, either because it produces the
/// same value for all lanes or only has its first lane used.