diff options
Diffstat (limited to 'llvm/lib')
31 files changed, 372 insertions, 72 deletions
| diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8aa488f..f65d88a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1443,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,            MF.hasBBSections() && NumMBBSectionRanges > 1,            // Use static_cast to avoid breakage of tests on windows.            static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, -          static_cast<bool>(EmitBBHash)}; +          static_cast<bool>(EmitBBHash), false};  }  void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fbcd614..485b44ae 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -287,6 +287,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {        }        continue;      } +    case 'h': { // Basic block hash secifier. +      // Skip the profile when the profile iterator (FI) refers to the +      // past-the-end element. +      if (FI == ProgramPathAndClusterInfo.end()) +        continue; +      for (auto BBIDHashStr : Values) { +        auto [BBIDStr, HashStr] = BBIDHashStr.split(':'); +        unsigned long long BBID = 0, Hash = 0; +        if (getAsUnsignedInteger(BBIDStr, 10, BBID)) +          return createProfileParseError(Twine("unsigned integer expected: '") + +                                         BBIDStr + "'"); +        if (getAsUnsignedInteger(HashStr, 16, Hash)) +          return createProfileParseError( +              Twine("unsigned integer expected in hex format: '") + HashStr + +              "'"); +        FI->second.BBHashes[BBID] = Hash; +      } +      continue; +    }      default:        return createProfileParseError(Twine("invalid specifier: '") +                                       Twine(Specifier) + "'"); diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index ca82857..5fab6ec 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1893,6 +1893,8 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,    case TargetOpcode::G_UADDSAT:    case TargetOpcode::G_SSUBSAT:    case TargetOpcode::G_USUBSAT: +  case TargetOpcode::G_SBFX: +  case TargetOpcode::G_UBFX:      return false;    case TargetOpcode::G_SSHLSAT:    case TargetOpcode::G_USHLSAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1ef5dc2..893556b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2715,6 +2715,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {            (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap;        SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags);        AddToWorklist(Add.getNode()); +      // We can't set InBounds even if both original ptradds were InBounds and +      // NUW: SDAG usually represents pointers as integers, therefore, the +      // matched pattern behaves as if it had implicit casts: +      //   (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds x, y))), z) +      // The outer inbounds ptradd might therefore rely on a provenance that x +      // does not have.        return DAG.getMemBasePlusOffset(X, Add, DL, Flags);      }    } @@ -2740,6 +2746,12 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {          // that.          SDNodeFlags Flags =              (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; +        // We can't set InBounds even if both original ptradds were InBounds and +        // NUW: SDAG usually represents pointers as integers, therefore, the +        // matched pattern behaves as if it had implicit casts: +        //   (ptradd inbounds (inttoptr (ptrtoint (ptradd inbounds GA, v))), c) +        // The outer inbounds ptradd might therefore rely on a provenance that +        // GA does not have.          SDValue Inner = DAG.getMemBasePlusOffset(GAValue, N1, DL, Flags);          AddToWorklist(Inner.getNode());          return DAG.getMemBasePlusOffset(Inner, N0.getOperand(1), DL, Flags); @@ -2763,8 +2775,13 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) {      bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z);      // If both additions in the original were NUW, reassociation preserves that. -    SDNodeFlags ReassocFlags = -        (N->getFlags() & N1->getFlags()) & SDNodeFlags::NoUnsignedWrap; +    SDNodeFlags CommonFlags = N->getFlags() & N1->getFlags(); +    SDNodeFlags ReassocFlags = CommonFlags & SDNodeFlags::NoUnsignedWrap; +    if (CommonFlags.hasNoUnsignedWrap()) { +      // If both operations are NUW and the PTRADD is inbounds, the offests are +      // both non-negative, so the reassociated PTRADDs are also inbounds. +      ReassocFlags |= N->getFlags() & SDNodeFlags::InBounds; +    }      if (ZIsConstant != YIsConstant) {        if (YIsConstant) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index db5cc37..6c78ef0 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -129,6 +129,25 @@ prettyLanguageVersionString(const DWARFAttribute &AttrValue,        static_cast<SourceLanguageName>(*LName), *LVersion);  } +static llvm::Expected<llvm::StringRef> +getApplePropertyName(const DWARFDie &PropDIE) { +  if (!PropDIE) +    return llvm::createStringError("invalid DIE"); + +  if (PropDIE.getTag() != DW_TAG_APPLE_property) +    return llvm::createStringError("not referencing a DW_TAG_APPLE_property"); + +  auto PropNameForm = PropDIE.find(DW_AT_APPLE_property_name); +  if (!PropNameForm) +    return ""; + +  auto NameOrErr = PropNameForm->getAsCString(); +  if (!NameOrErr) +    return NameOrErr.takeError(); + +  return *NameOrErr; +} +  static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,                            const DWARFAttribute &AttrValue, unsigned Indent,                            DIDumpOptions DumpOpts) { @@ -233,6 +252,15 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,              Die.getAttributeValueAsReferencedDie(FormValue).getName(                  DINameKind::LinkageName))        OS << Space << "\"" << Name << '\"'; +  } else if (Attr == DW_AT_APPLE_property) { +    auto PropDIE = Die.getAttributeValueAsReferencedDie(FormValue); +    if (auto PropNameOrErr = getApplePropertyName(PropDIE)) +      OS << Space << "\"" << *PropNameOrErr << '\"'; +    else +      DumpOpts.RecoverableErrorHandler(createStringError( +          errc::invalid_argument, +          llvm::formatv("decoding DW_AT_APPLE_property_name: {}", +                        toString(PropNameOrErr.takeError()))));    } else if (Attr == DW_AT_type || Attr == DW_AT_containing_type) {      DWARFDie D = resolveReferencedType(Die, FormValue);      if (D && !D.isNULL()) { diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6da97f9..354c51d 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -831,17 +831,17 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,    };    uint8_t Version = 0; -  uint8_t Feature = 0; +  uint16_t Feature = 0;    BBAddrMap::Features FeatEnable{};    while (!ULEBSizeErr && !MetadataDecodeErr && Cur &&           Cur.tell() < Content.size()) {      Version = Data.getU8(Cur);      if (!Cur)        break; -    if (Version < 2 || Version > 4) +    if (Version < 2 || Version > 5)        return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " +                           Twine(static_cast<int>(Version))); -    Feature = Data.getU8(Cur); // Feature byte +    Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur);      if (!Cur)        break;      auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature); @@ -858,6 +858,11 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,                           "basic block hash feature is enabled: version = " +                           Twine(static_cast<int>(Version)) +                           " feature = " + Twine(static_cast<int>(Feature))); +    if (FeatEnable.PostLinkCfg && Version < 5) +      return createError("version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when " +                         "post link cfg feature is enabled: version = " + +                         Twine(static_cast<int>(Version)) + +                         " feature = " + Twine(static_cast<int>(Feature)));      uint32_t NumBlocksInBBRange = 0;      uint32_t NumBBRanges = 1;      typename ELFFile<ELFT>::uintX_t RangeBaseAddress = 0; @@ -946,6 +951,10 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,          uint64_t BBF = FeatEnable.BBFreq                             ? readULEB128As<uint64_t>(Data, Cur, ULEBSizeErr)                             : 0; +        uint32_t PostLinkBBFreq = +            FeatEnable.PostLinkCfg +                ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) +                : 0;          // Branch probability          llvm::SmallVector<PGOAnalysisMap::PGOBBEntry::SuccessorEntry, 2> @@ -955,13 +964,20 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,            for (uint64_t I = 0; I < SuccCount; ++I) {              uint32_t BBID = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr);              uint32_t BrProb = readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr); +            uint32_t PostLinkFreq = +                FeatEnable.PostLinkCfg +                    ? readULEB128As<uint32_t>(Data, Cur, ULEBSizeErr) +                    : 0; +              if (PGOAnalyses) -              Successors.push_back({BBID, BranchProbability::getRaw(BrProb)}); +              Successors.push_back( +                  {BBID, BranchProbability::getRaw(BrProb), PostLinkFreq});            }          }          if (PGOAnalyses) -          PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)}); +          PGOBBEntries.push_back( +              {BlockFrequency(BBF), PostLinkBBFreq, std::move(Successors)});        }        if (PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 8b75fbe..8530785 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,13 +1465,19 @@ void ELFState<ELFT>::writeSectionContent(    for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) {      // Write version and feature values.      if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { -      if (E.Version > 4) +      if (E.Version > 5)          WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: "                               << static_cast<int>(E.Version)                               << "; encoding using the most recent version";        CBA.write(E.Version); -      CBA.write(E.Feature); -      SHeader.sh_size += 2; +      SHeader.sh_size += 1; +      if (E.Version < 5) { +        CBA.write(static_cast<uint8_t>(E.Feature)); +        SHeader.sh_size += 1; +      } else { +        CBA.write<uint16_t>(E.Feature, ELFT::Endianness); +        SHeader.sh_size += 2; +      }      }      auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature);      bool MultiBBRangeFeatureEnabled = false; @@ -1556,11 +1562,15 @@ void ELFState<ELFT>::writeSectionContent(      for (const auto &PGOBBE : PGOBBEntries) {        if (PGOBBE.BBFreq)          SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); +      if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value()) +        SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0));        if (PGOBBE.Successors) {          SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); -        for (const auto &[ID, BrProb] : *PGOBBE.Successors) { +        for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) {            SHeader.sh_size += CBA.writeULEB128(ID);            SHeader.sh_size += CBA.writeULEB128(BrProb); +          if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value()) +            SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0));          }        }      } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index f8a84b0..e5e5fc2 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1886,7 +1886,7 @@ void MappingTraits<ELFYAML::BBAddrMapEntry>::mapping(      IO &IO, ELFYAML::BBAddrMapEntry &E) {    assert(IO.getContext() && "The IO context is not initialized");    IO.mapRequired("Version", E.Version); -  IO.mapOptional("Feature", E.Feature, Hex8(0)); +  IO.mapOptional("Feature", E.Feature, Hex16(0));    IO.mapOptional("NumBBRanges", E.NumBBRanges);    IO.mapOptional("BBRanges", E.BBRanges);  } @@ -1920,6 +1920,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry>::mapping(      IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) {    assert(IO.getContext() && "The IO context is not initialized");    IO.mapOptional("BBFreq", E.BBFreq); +  IO.mapOptional("PostLinkBBFreq", E.PostLinkBBFreq);    IO.mapOptional("Successors", E.Successors);  } @@ -1929,6 +1930,7 @@ void MappingTraits<ELFYAML::PGOAnalysisMapEntry::PGOBBEntry::SuccessorEntry>::    assert(IO.getContext() && "The IO context is not initialized");    IO.mapRequired("ID", E.ID);    IO.mapRequired("BrProb", E.BrProb); +  IO.mapOptional("PostLinkBrFreq", E.PostLinkBrFreq);  }  void MappingTraits<ELFYAML::GnuHashHeader>::mapping(IO &IO, diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp index 3b810d0..79863e1 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp @@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass {  public:    static char ID; -  HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {} +  HexagonCopyHoisting() : MachineFunctionPass(ID) {}    StringRef getPassName() const override { return "Hexagon Copy Hoisting"; } @@ -56,8 +56,8 @@ public:    void moveCopyInstr(MachineBasicBlock *DestBB,                       std::pair<Register, Register> Key, MachineInstr *MI); -  MachineFunction *MFN; -  MachineRegisterInfo *MRI; +  MachineFunction *MFN = nullptr; +  MachineRegisterInfo *MRI = nullptr;    std::vector<DenseMap<std::pair<Register, Register>, MachineInstr *>>        CopyMIList;  }; diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp index 93418f7..a10c937 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp @@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions,  namespace {  class HexagonGenMemAbsolute : public MachineFunctionPass { -  const HexagonInstrInfo *TII; -  MachineRegisterInfo *MRI; -  const TargetRegisterInfo *TRI; +  const HexagonInstrInfo *TII = nullptr; +  MachineRegisterInfo *MRI = nullptr; +  const TargetRegisterInfo *TRI = nullptr;  public:    static char ID; -  HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {} +  HexagonGenMemAbsolute() : MachineFunctionPass(ID) {}    StringRef getPassName() const override {      return "Hexagon Generate Load/Store Set Absolute Address Instruction"; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 1637b91..d19920c 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -612,6 +612,9 @@ let Predicates = [UseHVX] in {             (V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>;    def: Pat<(VecQ32 (trunc HVI32:$Vs)),             (V6_vandvrt HvxVR:$Vs, (ToI32 0x01010101))>; +  def: Pat<(VecQ16 (trunc HWI32:$Vss)), +           (Combineq(VecQ32(V6_vandvrt (HiVec $Vss), (ToI32 0x01010101))), +           (VecQ32 (V6_vandvrt (LoVec $Vss), (ToI32 0x01010101))))>;  }  let Predicates = [UseHVX] in { diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp index 71bdfc66..5a85f34 100644 --- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp @@ -43,7 +43,7 @@ namespace {  class HexagonTfrCleanup : public MachineFunctionPass {  public:    static char ID; -  HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {} +  HexagonTfrCleanup() : MachineFunctionPass(ID) {}    StringRef getPassName() const override { return "Hexagon TFR Cleanup"; }    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.setPreservesAll(); @@ -52,8 +52,8 @@ public:    bool runOnMachineFunction(MachineFunction &MF) override;  private: -  const HexagonInstrInfo *HII; -  const TargetRegisterInfo *TRI; +  const HexagonInstrInfo *HII = nullptr; +  const TargetRegisterInfo *TRI = nullptr;    typedef DenseMap<unsigned, uint64_t> ImmediateMap; diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 690dd73..e86b21c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))),  // FP Rounding  let Predicates = [HasBasicF, IsLA64] in {  def : PatFpr<frint, FRINT_S, FPR32>; +def : PatFpr<flog2, FLOGB_S, FPR32>;  } // Predicates = [HasBasicF, IsLA64]  let Predicates = [HasBasicF, IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index daefbaa..2e88254 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>;  // FP Rounding  let Predicates = [HasBasicD, IsLA64] in {  def : PatFpr<frint, FRINT_D, FPR64>; +def : PatFpr<flog2, FLOGB_D, FPR64>;  } // Predicates = [HasBasicD, IsLA64]  /// Pseudo-instructions needed for the soft-float ABI with LA32D diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 80c96c6..a6de839 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,      setOperationAction(ISD::FP_TO_BF16, MVT::f32,                         Subtarget.isSoftFPABI() ? LibCall : Custom); -    if (Subtarget.is64Bit()) +    if (Subtarget.is64Bit()) {        setOperationAction(ISD::FRINT, MVT::f32, Legal); +      setOperationAction(ISD::FLOG2, MVT::f32, Legal); +    }      if (!Subtarget.hasBasicD()) {        setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); @@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,      setOperationAction(ISD::FP_TO_BF16, MVT::f64,                         Subtarget.isSoftFPABI() ? LibCall : Custom); -    if (Subtarget.is64Bit()) +    if (Subtarget.is64Bit()) {        setOperationAction(ISD::FRINT, MVT::f64, Legal); +      setOperationAction(ISD::FLOG2, MVT::f64, Legal); +    }    }    // Set operations for 'LSX' feature. @@ -362,6 +366,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FMA, VT, Legal);        setOperationAction(ISD::FSQRT, VT, Legal);        setOperationAction(ISD::FNEG, VT, Legal); +      setOperationAction(ISD::FLOG2, VT, Legal);        setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,                           ISD::SETUGE, ISD::SETUGT},                          VT, Expand); @@ -443,6 +448,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,        setOperationAction(ISD::FMA, VT, Legal);        setOperationAction(ISD::FSQRT, VT, Legal);        setOperationAction(ISD::FNEG, VT, Legal); +      setOperationAction(ISD::FLOG2, VT, Legal);        setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,                           ISD::SETUGE, ISD::SETUGT},                          VT, Expand); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 613dea6..ca4ee5f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1593,6 +1593,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa),  // XVFSQRT_{S/D}  defm : PatXrF<fsqrt, "XVFSQRT">; +// XVFLOGB_{S/D} +defm : PatXrF<flog2, "XVFLOGB">; +  // XVRECIP_{S/D}  def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj),            (XVFRECIP_S v8f32:$xj)>; @@ -2024,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),                 (XVFTINTRZ_LU_D v4f64:$vj)),                sub_128)>; +// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat<sra, "XVAVG_B", v32i8>; +defm : VAvgPat<sra, "XVAVG_H", v16i16>; +defm : VAvgPat<sra, "XVAVG_W", v8i32>; +defm : VAvgPat<sra, "XVAVG_D", v4i64>; +defm : VAvgPat<srl, "XVAVG_BU", v32i8>; +defm : VAvgPat<srl, "XVAVG_HU", v16i16>; +defm : VAvgPat<srl, "XVAVG_WU", v8i32>; +defm : VAvgPat<srl, "XVAVG_DU", v4i64>; +defm : VAvgrPat<sra, "XVAVGR_B", v32i8>; +defm : VAvgrPat<sra, "XVAVGR_H", v16i16>; +defm : VAvgrPat<sra, "XVAVGR_W", v8i32>; +defm : VAvgrPat<sra, "XVAVGR_D", v4i64>; +defm : VAvgrPat<srl, "XVAVGR_BU", v32i8>; +defm : VAvgrPat<srl, "XVAVGR_HU", v16i16>; +defm : VAvgrPat<srl, "XVAVGR_WU", v8i32>; +defm : VAvgrPat<srl, "XVAVGR_DU", v4i64>; +  // abs  def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>;  def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 4619c6b..92402ba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {    }  } +multiclass VAvgPat<SDPatternOperator OpNode, string Inst, ValueType vt> { +  def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), +            (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>; +} + +multiclass VAvgrPat<SDPatternOperator OpNode, string Inst, ValueType vt> { +  def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), +                             (vt (vsplat_imm_eq_1)))), +                    (vt (vsplat_imm_eq_1))), +            (!cast<LAInst>(Inst) vt:$vj, vt:$vk)>; +} +  let Predicates = [HasExtLSX] in {  // VADD_{B/H/W/D} @@ -1783,6 +1795,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va),  // VFSQRT_{S/D}  defm : PatVrF<fsqrt, "VFSQRT">; +// VFLOGB_{S/D} +defm : PatVrF<flog2, "VFLOGB">; +  // VFRECIP_{S/D}  def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj),            (VFRECIP_S v4f32:$vj)>; @@ -2154,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in),  def : Pat<(f64 f64imm_vldi:$in),            (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; +// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat<sra, "VAVG_B", v16i8>; +defm : VAvgPat<sra, "VAVG_H", v8i16>; +defm : VAvgPat<sra, "VAVG_W", v4i32>; +defm : VAvgPat<sra, "VAVG_D", v2i64>; +defm : VAvgPat<srl, "VAVG_BU", v16i8>; +defm : VAvgPat<srl, "VAVG_HU", v8i16>; +defm : VAvgPat<srl, "VAVG_WU", v4i32>; +defm : VAvgPat<srl, "VAVG_DU", v2i64>; +defm : VAvgrPat<sra, "VAVGR_B", v16i8>; +defm : VAvgrPat<sra, "VAVGR_H", v8i16>; +defm : VAvgrPat<sra, "VAVGR_W", v4i32>; +defm : VAvgrPat<sra, "VAVGR_D", v2i64>; +defm : VAvgrPat<srl, "VAVGR_BU", v16i8>; +defm : VAvgrPat<srl, "VAVGR_HU", v8i16>; +defm : VAvgrPat<srl, "VAVGR_WU", v4i32>; +defm : VAvgrPat<srl, "VAVGR_DU", v2i64>; +  // abs  def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>;  def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index 000d296..4ff489d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -296,8 +296,9 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,                                     std::optional<Reloc::Model> RM,                                     std::optional<CodeModel::Model> CM,                                     CodeGenOptLevel OL, bool JIT) -    : CodeGenTargetMachineImpl(T, TT.computeDataLayout(), TT, CPU, -                               computeFSAdditions(FS, OL, TT), Options, +    : CodeGenTargetMachineImpl(T, +                               TT.computeDataLayout(Options.MCOptions.ABIName), +                               TT, CPU, computeFSAdditions(FS, OL, TT), Options,                                 getEffectiveRelocModel(TT, RM),                                 getEffectivePPCCodeModel(TT, CM, JIT), OL),        TLOF(createTLOF(getTargetTriple())), diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 8198173..282cf5d 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -92,6 +92,10 @@ private:    void emitFence(AtomicOrdering FenceOrdering, SyncScope::ID FenceSSID,                   MachineIRBuilder &MIB) const;    bool selectUnmergeValues(MachineInstr &MI, MachineIRBuilder &MIB) const; +  void addVectorLoadStoreOperands(MachineInstr &I, +                                  SmallVectorImpl<SrcOp> &SrcOps, +                                  unsigned &CurOp, bool IsMasked, +                                  bool IsStrided) const;    bool selectIntrinsicWithSideEffects(MachineInstr &I,                                        MachineIRBuilder &MIB) const; @@ -716,6 +720,26 @@ static unsigned selectRegImmLoadStoreOp(unsigned GenericOpc, unsigned OpSize) {    return GenericOpc;  } +void RISCVInstructionSelector::addVectorLoadStoreOperands( +    MachineInstr &I, SmallVectorImpl<SrcOp> &SrcOps, unsigned &CurOp, +    bool IsMasked, bool IsStrided) const { +  // Base Pointer +  auto PtrReg = I.getOperand(CurOp++).getReg(); +  SrcOps.push_back(PtrReg); + +  // Stride +  if (IsStrided) { +    auto StrideReg = I.getOperand(CurOp++).getReg(); +    SrcOps.push_back(StrideReg); +  } + +  // Mask +  if (IsMasked) { +    auto MaskReg = I.getOperand(CurOp++).getReg(); +    SrcOps.push_back(MaskReg); +  } +} +  bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(      MachineInstr &I, MachineIRBuilder &MIB) const {    // Find the intrinsic ID. @@ -752,21 +776,7 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(        SrcOps.push_back(Register(RISCV::NoRegister));      } -    // Base Pointer -    auto PtrReg = I.getOperand(CurOp++).getReg(); -    SrcOps.push_back(PtrReg); - -    // Stride -    if (IsStrided) { -      auto StrideReg = I.getOperand(CurOp++).getReg(); -      SrcOps.push_back(StrideReg); -    } - -    // Mask -    if (IsMasked) { -      auto MaskReg = I.getOperand(CurOp++).getReg(); -      SrcOps.push_back(MaskReg); -    } +    addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided);      RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT));      const RISCV::VLEPseudo *P = @@ -795,6 +805,48 @@ bool RISCVInstructionSelector::selectIntrinsicWithSideEffects(      I.eraseFromParent();      return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI);    } +  case Intrinsic::riscv_vsm: +  case Intrinsic::riscv_vse: +  case Intrinsic::riscv_vse_mask: +  case Intrinsic::riscv_vsse: +  case Intrinsic::riscv_vsse_mask: { +    bool IsMasked = IntrinID == Intrinsic::riscv_vse_mask || +                    IntrinID == Intrinsic::riscv_vsse_mask; +    bool IsStrided = IntrinID == Intrinsic::riscv_vsse || +                     IntrinID == Intrinsic::riscv_vsse_mask; +    LLT VT = MRI->getType(I.getOperand(1).getReg()); +    unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); + +    // Sources +    unsigned CurOp = 1; +    SmallVector<SrcOp, 4> SrcOps; // Source registers. + +    // Store value +    auto PassthruReg = I.getOperand(CurOp++).getReg(); +    SrcOps.push_back(PassthruReg); + +    addVectorLoadStoreOperands(I, SrcOps, CurOp, IsMasked, IsStrided); + +    RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(VT)); +    const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( +        IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); + +    auto PseudoMI = MIB.buildInstr(P->Pseudo, {}, SrcOps); + +    // Select VL +    auto VLOpFn = renderVLOp(I.getOperand(CurOp++)); +    for (auto &RenderFn : *VLOpFn) +      RenderFn(PseudoMI); + +    // SEW +    PseudoMI.addImm(Log2SEW); + +    // Memref +    PseudoMI.cloneMemRefs(I); + +    I.eraseFromParent(); +    return constrainSelectedInstRegOperands(*PseudoMI, TII, TRI, RBI); +  }    }  } diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 4105618..526675a 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -127,6 +127,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,    case RISCV::PseudoCCAND:    case RISCV::PseudoCCOR:    case RISCV::PseudoCCXOR: +  case RISCV::PseudoCCMAX: +  case RISCV::PseudoCCMAXU: +  case RISCV::PseudoCCMIN: +  case RISCV::PseudoCCMINU:    case RISCV::PseudoCCADDW:    case RISCV::PseudoCCSUBW:    case RISCV::PseudoCCSLL: @@ -217,6 +221,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,          .addImm(0);    } else {      unsigned NewOpc; +    // clang-format off      switch (MI.getOpcode()) {      default:        llvm_unreachable("Unexpected opcode!"); @@ -228,6 +233,10 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,      case RISCV::PseudoCCAND:   NewOpc = RISCV::AND;   break;      case RISCV::PseudoCCOR:    NewOpc = RISCV::OR;    break;      case RISCV::PseudoCCXOR:   NewOpc = RISCV::XOR;   break; +    case RISCV::PseudoCCMAX:   NewOpc = RISCV::MAX;   break; +    case RISCV::PseudoCCMIN:   NewOpc = RISCV::MIN;   break; +    case RISCV::PseudoCCMAXU:  NewOpc = RISCV::MAXU;  break; +    case RISCV::PseudoCCMINU:  NewOpc = RISCV::MINU;  break;      case RISCV::PseudoCCADDI:  NewOpc = RISCV::ADDI;  break;      case RISCV::PseudoCCSLLI:  NewOpc = RISCV::SLLI;  break;      case RISCV::PseudoCCSRLI:  NewOpc = RISCV::SRLI;  break; @@ -250,6 +259,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,      case RISCV::PseudoCCNDS_BFOS: NewOpc = RISCV::NDS_BFOS; break;      case RISCV::PseudoCCNDS_BFOZ: NewOpc = RISCV::NDS_BFOZ; break;      } +    // clang-format on      if (NewOpc == RISCV::NDS_BFOZ || NewOpc == RISCV::NDS_BFOS) {        BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index b4556f6..cfee6ab 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1851,6 +1851,11 @@ def TuneShortForwardBranchOpt  def HasShortForwardBranchOpt : Predicate<"Subtarget->hasShortForwardBranchOpt()">;  def NoShortForwardBranchOpt : Predicate<"!Subtarget->hasShortForwardBranchOpt()">; +def TuneShortForwardBranchIMinMax +    : SubtargetFeature<"short-forward-branch-i-minmax", "HasShortForwardBranchIMinMax", +                       "true", "Enable short forward branch optimization for min,max instructions in Zbb", +                       [TuneShortForwardBranchOpt]>; +  // Some subtargets require a S2V transfer buffer to move scalars into vectors.  // FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.  def TuneNoSinkSplatOperands diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 912b82d..3a7013d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1699,6 +1699,10 @@ unsigned getPredicatedOpcode(unsigned Opcode) {    case RISCV::AND:   return RISCV::PseudoCCAND;    case RISCV::OR:    return RISCV::PseudoCCOR;    case RISCV::XOR:   return RISCV::PseudoCCXOR; +  case RISCV::MAX:   return RISCV::PseudoCCMAX; +  case RISCV::MAXU:  return RISCV::PseudoCCMAXU; +  case RISCV::MIN:   return RISCV::PseudoCCMIN; +  case RISCV::MINU:  return RISCV::PseudoCCMINU;    case RISCV::ADDI:  return RISCV::PseudoCCADDI;    case RISCV::SLLI:  return RISCV::PseudoCCSLLI; @@ -1735,7 +1739,8 @@ unsigned getPredicatedOpcode(unsigned Opcode) {  /// return the defining instruction.  static MachineInstr *canFoldAsPredicatedOp(Register Reg,                                             const MachineRegisterInfo &MRI, -                                           const TargetInstrInfo *TII) { +                                           const TargetInstrInfo *TII, +                                           const RISCVSubtarget &STI) {    if (!Reg.isVirtual())      return nullptr;    if (!MRI.hasOneNonDBGUse(Reg)) @@ -1743,6 +1748,12 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,    MachineInstr *MI = MRI.getVRegDef(Reg);    if (!MI)      return nullptr; + +  if (!STI.hasShortForwardBranchIMinMax() && +      (MI->getOpcode() == RISCV::MAX || MI->getOpcode() == RISCV::MIN || +       MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU)) +    return nullptr; +    // Check if MI can be predicated and folded into the CCMOV.    if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)      return nullptr; @@ -1806,10 +1817,10 @@ RISCVInstrInfo::optimizeSelect(MachineInstr &MI,    MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();    MachineInstr *DefMI = -      canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); +      canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this, STI);    bool Invert = !DefMI;    if (!DefMI) -    DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); +    DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this, STI);    if (!DefMI)      return nullptr; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 0114fbd..5a67a5a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -106,6 +106,10 @@ def PseudoCCSRA : SFBALU_rr;  def PseudoCCAND : SFBALU_rr;  def PseudoCCOR  : SFBALU_rr;  def PseudoCCXOR : SFBALU_rr; +def PseudoCCMAX : SFBALU_rr; +def PseudoCCMIN : SFBALU_rr; +def PseudoCCMAXU : SFBALU_rr; +def PseudoCCMINU : SFBALU_rr;  def PseudoCCADDI : SFBALU_ri;  def PseudoCCANDI : SFBALU_ri; diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp b/llvm/lib/TargetParser/TargetDataLayout.cpp index d765d9c..d735923 100644 --- a/llvm/lib/TargetParser/TargetDataLayout.cpp +++ b/llvm/lib/TargetParser/TargetDataLayout.cpp @@ -208,7 +208,7 @@ static std::string computeMipsDataLayout(const Triple &TT, StringRef ABIName) {    return Ret;  } -static std::string computePowerDataLayout(const Triple &T) { +static std::string computePowerDataLayout(const Triple &T, StringRef ABIName) {    bool is64Bit = T.isPPC64();    std::string Ret; @@ -228,7 +228,8 @@ static std::string computePowerDataLayout(const Triple &T) {    // If the target ABI uses function descriptors, then the alignment of function    // pointers depends on the alignment used to emit the descriptor. Otherwise,    // function pointers are aligned to 32 bits because the instructions must be. -  if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) { +  if ((T.getArch() == Triple::ppc64 && +       (!T.isPPC64ELFv2ABI() && ABIName != "elfv2"))) {      Ret += "-Fi64";    } else if (T.isOSAIX()) {      Ret += is64Bit ? "-Fi64" : "-Fi32"; @@ -573,7 +574,7 @@ std::string Triple::computeDataLayout(StringRef ABIName) const {    case Triple::ppcle:    case Triple::ppc64:    case Triple::ppc64le: -    return computePowerDataLayout(*this); +    return computePowerDataLayout(*this, ABIName);    case Triple::r600:    case Triple::amdgcn:      return computeAMDDataLayout(*this); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b03fb62..7f6d779 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5977,14 +5977,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,    }    // Prune obsolete incoming values off the successors' PHI nodes. -  for (auto BBI = Dest->begin(); isa<PHINode>(BBI); ++BBI) { +  for (auto &PHI : make_early_inc_range(Dest->phis())) {      unsigned PreviousEdges = Cases->size();      if (Dest == SI->getDefaultDest())        ++PreviousEdges;      for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) -      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); +      PHI.removeIncomingValue(SI->getParent());    } -  for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) { +  for (auto &PHI : make_early_inc_range(OtherDest->phis())) {      unsigned PreviousEdges = OtherCases->size();      if (OtherDest == SI->getDefaultDest())        ++PreviousEdges; @@ -5993,7 +5993,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,      if (NewBI->isUnconditional())        ++E;      for (unsigned I = 0; I != E; ++I) -      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); +      PHI.removeIncomingValue(SI->getParent());    }    // Clean up the default block - it may have phis or other instructions before diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 505fb43..25bf49d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(          continue;        VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, -                            *CM.PSE.getSE()); +                            *CM.PSE.getSE(), OrigLoop);        precomputeCosts(*Plan, VF, CostCtx);        auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry());        for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { @@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {        // Add on other costs that are modelled in VPlan, but not in the legacy        // cost model.        VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind, -                            *CM.PSE.getSE()); +                            *CM.PSE.getSE(), OrigLoop);        VPRegionBlock *VectorRegion = P->getVectorLoopRegion();        assert(VectorRegion && "Expected to have a vector region!");        for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( @@ -6876,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,  InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,                                                 ElementCount VF) const { -  VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE()); +  VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(), +                        OrigLoop);    InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx);    // Now compute and add the VPlan-based cost. @@ -7110,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {    // case, don't trigger the assertion, as the extra simplifications may cause a    // different VF to be picked by the VPlan-based cost model.    VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind, -                        *CM.PSE.getSE()); +                        *CM.PSE.getSE(), OrigLoop);    precomputeCosts(BestPlan, BestFactor.Width, CostCtx);    // Verify that the VPlan-based and legacy cost models agree, except for VPlans    // with early exits and plans with additional VPlan simplifications. The    // legacy cost model doesn't properly model costs for such loops.    assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() || +          !Legal->getLAI()->getSymbolicStrides().empty() ||            planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),                                                  CostCtx, OrigLoop,                                                  BestFactor.Width) || @@ -8441,7 +8443,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(    // and mulacc-reduction are implemented.    if (!CM.foldTailWithEVL()) {      VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, -                          *CM.PSE.getSE()); +                          *CM.PSE.getSE(), OrigLoop);      VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan,                               CostCtx, Range);    } @@ -9911,7 +9913,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {      bool ForceVectorization =          Hints.getForce() == LoopVectorizeHints::FK_Enabled;      VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM, -                          CM.CostKind, *CM.PSE.getSE()); +                          CM.CostKind, *CM.PSE.getSE(), L);      if (!ForceVectorization &&          !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx,                                       LVP.getPlanFor(VF.Width), SEL, diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 2aaabd9..965426f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -350,13 +350,14 @@ struct VPCostContext {    SmallPtrSet<Instruction *, 8> SkipCostComputation;    TargetTransformInfo::TargetCostKind CostKind;    ScalarEvolution &SE; +  const Loop *L;    VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,                  const VPlan &Plan, LoopVectorizationCostModel &CM,                  TargetTransformInfo::TargetCostKind CostKind, -                ScalarEvolution &SE) +                ScalarEvolution &SE, const Loop *L)        : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM), -        CostKind(CostKind), SE(SE) {} +        CostKind(CostKind), SE(SE), L(L) {}    /// Return the cost for \p UI with \p VF using the legacy cost model as    /// fallback until computing the cost of all recipes migrates to VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9a63c80..bde62dd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3167,26 +3167,30 @@ bool VPReplicateRecipe::shouldPack() const {    });  } -/// Returns true if \p Ptr is a pointer computation for which the legacy cost -/// model computes a SCEV expression when computing the address cost. -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { +/// Returns a SCEV expression for \p Ptr if it is a pointer computation for +/// which the legacy cost model computes a SCEV expression when computing the +/// address cost. Computing SCEVs for VPValues is incomplete and returns +/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In +/// those cases we fall back to the legacy cost model. Otherwise return nullptr. +static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE, +                                        const Loop *L) {    auto *PtrR = Ptr->getDefiningRecipe();    if (!PtrR || !((isa<VPReplicateRecipe>(PtrR) &&                    cast<VPReplicateRecipe>(PtrR)->getOpcode() ==                        Instruction::GetElementPtr) ||                   isa<VPWidenGEPRecipe>(PtrR) ||                   match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) -    return false; +    return nullptr;    // We are looking for a GEP where all indices are either loop invariant or    // inductions.    for (VPValue *Opd : drop_begin(PtrR->operands())) {      if (!Opd->isDefinedOutsideLoopRegions() &&          !isa<VPScalarIVStepsRecipe, VPWidenIntOrFpInductionRecipe>(Opd)) -      return false; +      return nullptr;    } -  return true; +  return vputils::getSCEVExprForVPValue(Ptr, SE, L);  }  /// Returns true if \p V is used as part of the address of another load or @@ -3354,9 +3358,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,      bool IsLoad = UI->getOpcode() == Instruction::Load;      const VPValue *PtrOp = getOperand(!IsLoad); -    // TODO: Handle cases where we need to pass a SCEV to -    // getAddressComputationCost. -    if (shouldUseAddressAccessSCEV(PtrOp)) +    const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L); +    if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))        break;      Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); @@ -3374,7 +3377,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,      InstructionCost ScalarCost =          ScalarMemOpCost + Ctx.TTI.getAddressComputationCost(                                PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, -                              nullptr, Ctx.CostKind); +                              PtrSCEV, Ctx.CostKind);      if (isSingleScalar())        return ScalarCost; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4d98014..986c801 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1419,6 +1419,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {                                            true /*IsSingleScalar*/);        Clone->insertBefore(RepOrWidenR);        RepOrWidenR->replaceAllUsesWith(Clone); +      if (isDeadRecipe(*RepOrWidenR)) +        RepOrWidenR->eraseFromParent();      }    }  } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 4db92e7..8c23e78 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -75,7 +75,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {           B == Plan.getBackedgeTakenCount();  } -const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { +const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, +                                           ScalarEvolution &SE, const Loop *L) {    if (V->isLiveIn()) {      if (Value *LiveIn = V->getLiveInIRValue())        return SE.getSCEV(LiveIn); @@ -86,6 +87,53 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) {    return TypeSwitch<const VPRecipeBase *, const SCEV *>(V->getDefiningRecipe())        .Case<VPExpandSCEVRecipe>(            [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) +      .Case<VPCanonicalIVPHIRecipe>([&SE, L](const VPCanonicalIVPHIRecipe *R) { +        if (!L) +          return SE.getCouldNotCompute(); +        const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); +        return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, +                                SCEV::FlagAnyWrap); +      }) +      .Case<VPDerivedIVRecipe>([&SE, L](const VPDerivedIVRecipe *R) { +        const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); +        const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L); +        const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L); +        if (any_of(ArrayRef({Start, IV, Scale}), IsaPred<SCEVCouldNotCompute>)) +          return SE.getCouldNotCompute(); + +        return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()), +                             SE.getMulExpr(IV, SE.getTruncateOrSignExtend( +                                                   Scale, IV->getType()))); +      }) +      .Case<VPScalarIVStepsRecipe>([&SE, L](const VPScalarIVStepsRecipe *R) { +        const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L); +        const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L); +        if (isa<SCEVCouldNotCompute>(IV) || isa<SCEVCouldNotCompute>(Step) || +            !Step->isOne()) +          return SE.getCouldNotCompute(); +        return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()), +                             Step); +      }) +      .Case<VPReplicateRecipe>([&SE, L](const VPReplicateRecipe *R) { +        if (R->getOpcode() != Instruction::GetElementPtr) +          return SE.getCouldNotCompute(); + +        const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L); +        if (isa<SCEVCouldNotCompute>(Base)) +          return SE.getCouldNotCompute(); + +        SmallVector<const SCEV *> IndexExprs; +        for (VPValue *Index : drop_begin(R->operands())) { +          const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L); +          if (isa<SCEVCouldNotCompute>(IndexExpr)) +            return SE.getCouldNotCompute(); +          IndexExprs.push_back(IndexExpr); +        } + +        Type *SrcElementTy = cast<GetElementPtrInst>(R->getUnderlyingInstr()) +                                 ->getSourceElementType(); +        return SE.getGEPExpr(Base, IndexExprs, SrcElementTy); +      })        .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); });  } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 37cd413..c21a0e7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr);  /// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no  /// SCEV expression could be constructed. -const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); +const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, +                                  const Loop *L = nullptr);  /// Returns true if \p VPV is a single scalar, either because it produces the  /// same value for all lanes or only has its first lane used. | 
