diff options
Diffstat (limited to 'llvm/lib/CodeGen')
22 files changed, 417 insertions, 115 deletions
| diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fefde64f..8aa488f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -41,6 +41,7 @@  #include "llvm/CodeGen/GCMetadataPrinter.h"  #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"  #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h"  #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"  #include "llvm/CodeGen/MachineConstantPool.h"  #include "llvm/CodeGen/MachineDominators.h" @@ -184,6 +185,8 @@ static cl::opt<bool> PrintLatency(      cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden,      cl::init(false)); +extern cl::opt<bool> EmitBBHash; +  STATISTIC(EmittedInsts, "Number of machine instrs printed");  char AsmPrinter::ID = 0; @@ -474,6 +477,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<GCModuleInfo>();    AU.addRequired<LazyMachineBlockFrequencyInfoPass>();    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); +  if (EmitBBHash) +    AU.addRequired<MachineBlockHashInfo>();  }  bool AsmPrinter::doInitialization(Module &M) { @@ -1434,14 +1439,11 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,          "BB entries info is required for BBFreq and BrProb "          "features");    } -  return {FuncEntryCountEnabled, -          BBFreqEnabled, -          BrProbEnabled, +  return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,            MF.hasBBSections() && NumMBBSectionRanges > 1,            // Use static_cast to avoid breakage of tests on windows. -          static_cast<bool>(BBAddrMapSkipEmitBBEntries), -          HasCalls, -          false}; +          static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, +          static_cast<bool>(EmitBBHash)};  }  void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1500,6 +1502,9 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {        PrevMBBEndSymbol = MBBSymbol;      } +    auto MBHI = +        Features.BBHash ? &getAnalysis<MachineBlockHashInfo>() : nullptr; +      if (!Features.OmitBBEntries) {        OutStreamer->AddComment("BB id");        // Emit the BB ID for this basic block. @@ -1527,6 +1532,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {        emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel);        // Emit the Metadata.        OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); +      // Emit the Hash. +      if (MBHI) { +        OutStreamer->emitInt64(MBHI->getMBBHash(MBB)); +      }      }      PrevMBBEndSymbol = MBB.getEndSymbol();    } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index b6872605..4373c53 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen    LowerEmuTLS.cpp    MachineBasicBlock.cpp    MachineBlockFrequencyInfo.cpp +  MachineBlockHashInfo.cpp    MachineBlockPlacement.cpp    MachineBranchProbabilityInfo.cpp    MachineCFGPrinter.cpp diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 2b5ced3..f44eb22 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -1108,8 +1108,8 @@ public:  };  } // namespace -ExpandFpPass::ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel) -    : TM(TM), OptLevel(OptLevel) {} +ExpandFpPass::ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel) +    : TM(&TM), OptLevel(OptLevel) {}  void ExpandFpPass::printPipeline(      raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b425b95..1f10478 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors(    MI.eraseFromParent();  } -bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const { -  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && -         "Invalid instruction"); -  auto &Shuffle = cast<GShuffleVector>(MI); - -  Register SrcVec1 = Shuffle.getSrc1Reg(); -  Register SrcVec2 = Shuffle.getSrc2Reg(); - -  LLT SrcVec1Type = MRI.getType(SrcVec1); -  LLT SrcVec2Type = MRI.getType(SrcVec2); -  return SrcVec1Type.isVector() && SrcVec2Type.isVector(); -} -  void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {    auto &Shuffle = cast<GShuffleVector>(MI); @@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector(    LLT DstType = MRI.getType(MI.getOperand(0).getReg());    Register Src1 = MI.getOperand(1).getReg();    LLT SrcType = MRI.getType(Src1); -  // As bizarre as it may look, shuffle vector can actually produce -  // scalar! This is because at the IR level a <1 x ty> shuffle -  // vector is perfectly valid. -  unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; -  unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; + +  unsigned DstNumElts = DstType.getNumElements(); +  unsigned SrcNumElts = SrcType.getNumElements();    // If the resulting vector is smaller than the size of the source    // vectors being concatenated, we won't be able to replace the @@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector(    //    // TODO: If the size between the source and destination don't match    //       we could still emit an extract vector element in that case. -  if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) +  if (DstNumElts < 2 * SrcNumElts)      return false;    // Check that the shuffle mask can be broken evenly between the @@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector(    MI.eraseFromParent();  } -bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const { -  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && -         "Invalid instruction kind"); - -  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); -  return Mask.size() == 1; -} - -void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const { -  Register DstReg = MI.getOperand(0).getReg(); -  Builder.setInsertPt(*MI.getParent(), MI); - -  int I = MI.getOperand(3).getShuffleMask()[0]; -  Register Src1 = MI.getOperand(1).getReg(); -  LLT Src1Ty = MRI.getType(Src1); -  int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; -  Register SrcReg; -  if (I >= Src1NumElts) { -    SrcReg = MI.getOperand(2).getReg(); -    I -= Src1NumElts; -  } else if (I >= 0) -    SrcReg = Src1; - -  if (I < 0) -    Builder.buildUndef(DstReg); -  else if (!MRI.getType(SrcReg).isVector()) -    Builder.buildCopy(DstReg, SrcReg); -  else -    Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I); - -  MI.eraseFromParent(); -} -  namespace {  /// Select a preference between two uses. CurrentUse is the current preference @@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI,      return false;    ArrayRef<int> Mask = Shuffle.getMask(); -  const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; +  const unsigned NumSrcElems = Src1Ty.getNumElements();    bool TouchesSrc1 = false;    bool TouchesSrc2 = false; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 04d9309..d6f23b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,                           Depth + 1);      computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,                           Depth + 1); +    OffsetKnown = OffsetKnown.sext(BitWidth); +    WidthKnown = WidthKnown.sext(BitWidth);      Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);      // Sign extend the extracted value using shift left and arithmetic shift      // right. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b49040b..1fc90d0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3359,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U,      Mask = SVI->getShuffleMask();    else      Mask = cast<ConstantExpr>(U).getShuffleMask(); + +  // As GISel does not represent <1 x > vectors as a separate type from scalars, +  // we transform shuffle_vector with a scalar output to an +  // ExtractVectorElement. If the input type is also scalar it becomes a Copy. +  unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements(); +  unsigned SrcElts = +      cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements(); +  if (DstElts == 1) { +    unsigned M = Mask[0]; +    if (SrcElts == 1) { +      if (M == 0 || M == 1) +        return translateCopy(U, *U.getOperand(M), MIRBuilder); +      MIRBuilder.buildUndef(getOrCreateVReg(U)); +    } else { +      Register Dst = getOrCreateVReg(U); +      if (M < SrcElts) { +        MIRBuilder.buildExtractVectorElementConstant( +            Dst, getOrCreateVReg(*U.getOperand(0)), M); +      } else if (M < SrcElts * 2) { +        MIRBuilder.buildExtractVectorElementConstant( +            Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts); +      } else { +        MIRBuilder.buildUndef(Dst); +      } +    } +    return true; +  } + +  // A single element src is transformed to a build_vector. +  if (SrcElts == 1) { +    SmallVector<Register> Ops; +    Register Undef; +    for (int M : Mask) { +      LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL); +      if (M == 0 || M == 1) { +        Ops.push_back(getOrCreateVReg(*U.getOperand(M))); +      } else { +        if (!Undef.isValid()) { +          Undef = MRI->createGenericVirtualRegister(SrcTy); +          MIRBuilder.buildUndef(Undef); +        } +        Ops.push_back(Undef); +      } +    } +    MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops); +    return true; +  } +    ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);    MIRBuilder        .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 38ec83f..178529f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {    case G_FMINIMUMNUM:    case G_FMAXIMUMNUM:      return lowerFMinNumMaxNum(MI); +  case G_FMINIMUM: +  case G_FMAXIMUM: +    return lowerFMinimumMaximum(MI);    case G_MERGE_VALUES:      return lowerMergeValues(MI);    case G_UNMERGE_VALUES: @@ -5819,6 +5822,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(      } else if (InputUsed[0] == -1U) {        // No input vectors were used! The result is undefined.        Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); +    } else if (NewElts == 1) { +      Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);      } else {        Register Op0 = Inputs[InputUsed[0]];        // If only one input was used, use an undefined vector for the other. @@ -8775,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {    return Legalized;  } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { +  unsigned Opc = MI.getOpcode(); +  auto [Dst, Src0, Src1] = MI.getFirst3Regs(); +  LLT Ty = MRI.getType(Dst); +  LLT CmpTy = Ty.changeElementSize(1); + +  bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); +  unsigned OpcIeee = +      IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; +  unsigned OpcNonIeee = +      IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; +  bool MinMaxMustRespectOrderedZero = false; +  Register Res; + +  // IEEE variants don't need canonicalization +  if (LI.isLegalOrCustom({OpcIeee, Ty})) { +    Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); +    MinMaxMustRespectOrderedZero = true; +  } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { +    Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); +  } else { +    auto Compare = MIRBuilder.buildFCmp( +        IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); +    Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); +  } + +  // Propagate any NaN of both operands +  if (!MI.getFlag(MachineInstr::FmNoNans) && +      (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { +    auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + +    LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); +    APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); +    Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); +    if (Ty.isVector()) +      NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + +    Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); +  } + +  // fminimum/fmaximum requires -0.0 less than +0.0 +  if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { +    GISelValueTracking VT(MIRBuilder.getMF()); +    KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); +    KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + +    if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { +      const unsigned Flags = MI.getFlags(); +      Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); +      auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + +      unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + +      auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); +      auto LHSSelect = +          MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + +      auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); +      auto RHSSelect = +          MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + +      Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); +    } +  } + +  MIRBuilder.buildCopy(Dst, Res); +  MI.eraseFromParent(); +  return Legalized; +} +  LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {    // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c    Register DstReg = MI.getOperand(0).getReg(); @@ -9016,22 +9092,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {        continue;      } -    if (Src0Ty.isScalar()) { -      BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); -    } else { -      int NumElts = Src0Ty.getNumElements(); -      Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; -      int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; -      auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); -      auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); -      BuildVec.push_back(Extract.getReg(0)); -    } +    assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR"); + +    int NumElts = Src0Ty.getNumElements(); +    Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; +    int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; +    auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); +    auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); +    BuildVec.push_back(Extract.getReg(0));    } -  if (DstTy.isVector()) -    MIRBuilder.buildBuildVector(DstReg, BuildVec); -  else -    MIRBuilder.buildCopy(DstReg, BuildVec[0]); +  assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR"); +  MIRBuilder.buildBuildVector(DstReg, BuildVec);    MI.eraseFromParent();    return Legalized;  } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 27df7e3..4b4df98 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,    LLT DstTy = Res.getLLTTy(*getMRI());    LLT Src1Ty = Src1.getLLTTy(*getMRI());    LLT Src2Ty = Src2.getLLTTy(*getMRI()); -  const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; -  const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty; -  const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty; +  const LLT DstElemTy = DstTy.getScalarType(); +  const LLT ElemTy1 = Src1Ty.getScalarType(); +  const LLT ElemTy2 = Src2Ty.getScalarType();    assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2); +  assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported");    (void)DstElemTy;    (void)ElemTy1;    (void)ElemTy2; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9..4795d81 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {    if (expectAndConsume(MIToken::rparen))      return error("shufflemask should be terminated by ')'."); +  if (ShufMask.size() < 2) +    return error("shufflemask should have > 1 element"); +    ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);    Dest = MachineOperand::CreateShuffleMask(MaskAlloc);    return false; diff --git a/llvm/lib/CodeGen/MachineBlockHashInfo.cpp b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp new file mode 100644 index 0000000..c4d9c0f --- /dev/null +++ b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp @@ -0,0 +1,115 @@ +//===- llvm/CodeGen/MachineBlockHashInfo.cpp---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Compute the hashes of basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +uint64_t hashBlock(const MachineBasicBlock &MBB, bool HashOperands) { +  uint64_t Hash = 0; +  for (const MachineInstr &MI : MBB) { +    if (MI.isMetaInstruction() || MI.isTerminator()) +      continue; +    Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode()); +    if (HashOperands) { +      for (unsigned i = 0; i < MI.getNumOperands(); i++) { +        Hash = +            hashing::detail::hash_16_bytes(Hash, hash_value(MI.getOperand(i))); +      } +    } +  } +  return Hash; +} + +/// Fold a 64-bit integer to a 16-bit one. +uint16_t fold_64_to_16(const uint64_t Value) { +  uint16_t Res = static_cast<uint16_t>(Value); +  Res ^= static_cast<uint16_t>(Value >> 16); +  Res ^= static_cast<uint16_t>(Value >> 32); +  Res ^= static_cast<uint16_t>(Value >> 48); +  return Res; +} + +INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash", +                "Machine Block Hash Analysis", true, true) + +char MachineBlockHashInfo::ID = 0; + +MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) { +  initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry()); +} + +void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  MachineFunctionPass::getAnalysisUsage(AU); +} + +struct CollectHashInfo { +  uint64_t Offset; +  uint64_t OpcodeHash; +  uint64_t InstrHash; +  uint64_t NeighborHash; +}; + +bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) { +  DenseMap<const MachineBasicBlock *, CollectHashInfo> HashInfos; +  uint16_t Offset = 0; +  // Initialize hash components +  for (const MachineBasicBlock &MBB : F) { +    // offset of the machine basic block +    HashInfos[&MBB].Offset = Offset; +    Offset += MBB.size(); +    // Hashing opcodes +    HashInfos[&MBB].OpcodeHash = hashBlock(MBB, /*HashOperands=*/false); +    // Hash complete instructions +    HashInfos[&MBB].InstrHash = hashBlock(MBB, /*HashOperands=*/true); +  } + +  // Initialize neighbor hash +  for (const MachineBasicBlock &MBB : F) { +    uint64_t Hash = HashInfos[&MBB].OpcodeHash; +    // Append hashes of successors +    for (const MachineBasicBlock *SuccMBB : MBB.successors()) { +      uint64_t SuccHash = HashInfos[SuccMBB].OpcodeHash; +      Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); +    } +    // Append hashes of predecessors +    for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { +      uint64_t PredHash = HashInfos[PredMBB].OpcodeHash; +      Hash = hashing::detail::hash_16_bytes(Hash, PredHash); +    } +    HashInfos[&MBB].NeighborHash = Hash; +  } + +  // Assign hashes +  for (const MachineBasicBlock &MBB : F) { +    const auto &HashInfo = HashInfos[&MBB]; +    BlendedBlockHash BlendedHash(fold_64_to_16(HashInfo.Offset), +                                 fold_64_to_16(HashInfo.OpcodeHash), +                                 fold_64_to_16(HashInfo.InstrHash), +                                 fold_64_to_16(HashInfo.NeighborHash)); +    MBBHashInfo[&MBB] = BlendedHash.combine(); +  } + +  return false; +} + +uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) { +  return MBBHashInfo[&MBB]; +} + +MachineFunctionPass *llvm::createMachineBlockHashInfoPass() { +  return new MachineBlockHashInfo(); +} diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1154855..c0710c4 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {      if (Src0Ty != Src1Ty)        report("Source operands must be the same type", MI); -    if (Src0Ty.getScalarType() != DstTy.getScalarType()) +    if (Src0Ty.getScalarType() != DstTy.getScalarType()) {        report("G_SHUFFLE_VECTOR cannot change element type", MI); +      break; +    } +    if (!Src0Ty.isVector()) { +      report("G_SHUFFLE_VECTOR must have vector src", MI); +      break; +    } +    if (!DstTy.isVector()) { +      report("G_SHUFFLE_VECTOR must have vector dst", MI); +      break; +    }      // Don't check that all operands are vector because scalars are used in      // place of 1 element vectors. -    int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; -    int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; +    int SrcNumElts = Src0Ty.getNumElements(); +    int DstNumElts = DstTy.getNumElements();      ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask(); diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index f54e2f2..620d3d3 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -593,7 +593,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {      case Intrinsic::log:        Changed |= forEachCall(F, [&](CallInst *CI) {          Type *Ty = CI->getArgOperand(0)->getType(); -        if (!isa<ScalableVectorType>(Ty)) +        if (!TM || !isa<ScalableVectorType>(Ty))            return false;          const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering();          unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID()); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private:      unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); }    }; -  using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; +  using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>;    /// This map contains entries for each virtual register that is currently    /// available in a physical register.    LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d2ea652..8676060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,    //    nor a successor of N. Otherwise, if Op is folded that would    //    create a cycle.    unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); -  for (SDNode *Op : Ptr->users()) { +  for (SDUse &U : Ptr->uses()) { +    if (U.getResNo() != Ptr.getResNo()) +      continue; +      // Check for #1. +    SDNode *Op = U.getUser();      if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))        continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 603dc34..9656a30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -890,6 +890,7 @@ private:    SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);    SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);    SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); +  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);    SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);    SDValue ScalarizeVecRes_VSELECT(SDNode *N);    SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f..bb4a8d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -69,6 +69,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {      R = ScalarizeVecRes_UnaryOpWithExtraInput(N);      break;    case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; +  case ISD::ATOMIC_LOAD: +    R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N)); +    break;    case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;    case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;    case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -475,6 +478,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {    return Op;  } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { +  SDValue Result = DAG.getAtomicLoad( +      N->getExtensionType(), SDLoc(N), N->getMemoryVT().getVectorElementType(), +      N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), +      N->getMemOperand()); + +  // Legalize the chain result - switch anything that used the old chain to +  // use the new one. +  ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); +  return Result; +} +  SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {    assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90edaf3..379242e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8620,7 +8620,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,    if (TLI->shouldPreservePtrArith(this->getMachineFunction().getFunction(),                                    BasePtrVT))      return getNode(ISD::PTRADD, DL, BasePtrVT, Ptr, Offset, Flags); -  return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); +  // InBounds only applies to PTRADD, don't set it if we generate ADD. +  SDNodeFlags AddFlags = Flags; +  AddFlags.setInBounds(false); +  return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, AddFlags);  }  /// Returns true if memcpy source is constant data. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d57c5fb..dee0909 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() {    return updateRoot(PendingLoads);  } +SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) { +  // If the new exception behavior differs from that of the pending +  // ones, chain up them and update the root. +  switch (EB) { +  case fp::ExceptionBehavior::ebMayTrap: +  case fp::ExceptionBehavior::ebIgnore: +    // Floating-point exceptions produced by such operations are not intended +    // to be observed, so the sequence of these operations does not need to be +    // preserved. +    // +    // They however must not be mixed with the instructions that have strict +    // exception behavior. Placing an operation with 'ebIgnore' behavior between +    // 'ebStrict' operations could distort the observed exception behavior. +    if (!PendingConstrainedFPStrict.empty()) { +      assert(PendingConstrainedFP.empty()); +      updateRoot(PendingConstrainedFPStrict); +    } +    break; +  case fp::ExceptionBehavior::ebStrict: +    // Floating-point exception produced by these operations may be observed, so +    // they must be correctly chained. If trapping on FP exceptions is +    // disabled, the exceptions can be observed only by functions that read +    // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that +    // the order of operations is not significant between barriers. +    // +    // If trapping is enabled, each operation becomes an implicit observation +    // point, so the operations must be sequenced according their original +    // source order. +    if (!PendingConstrainedFP.empty()) { +      assert(PendingConstrainedFPStrict.empty()); +      updateRoot(PendingConstrainedFP); +    } +    // TODO: Add support for trapping-enabled scenarios. +  } +  return DAG.getRoot(); +} +  SDValue SelectionDAGBuilder::getRoot() {    // Chain up all pending constrained intrinsics together with all    // pending loads, by simply appending them to PendingLoads and @@ -4390,6 +4427,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {          if (NW.hasNoUnsignedWrap() ||              (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap()))            Flags |= SDNodeFlags::NoUnsignedWrap; +        Flags.setInBounds(NW.isInBounds());          N = DAG.getMemBasePlusOffset(              N, DAG.getConstant(Offset, dl, N.getValueType()), dl, Flags); @@ -4433,6 +4471,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {          if (NW.hasNoUnsignedWrap() ||              (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap()))            Flags.setNoUnsignedWrap(true); +        Flags.setInBounds(NW.isInBounds());          OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); @@ -4502,6 +4541,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {        // pointer index type (add nuw).        SDNodeFlags AddFlags;        AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); +      AddFlags.setInBounds(NW.isInBounds());        N = DAG.getMemBasePlusOffset(N, IdxN, dl, AddFlags);      } @@ -8295,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,    }  } +void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result, +                                           fp::ExceptionBehavior EB) { +  assert(Result.getNode()->getNumValues() == 2); +  SDValue OutChain = Result.getValue(1); +  assert(OutChain.getValueType() == MVT::Other); + +  // Instead of updating the root immediately, push the produced chain to the +  // appropriate list, deferring the update until the root is requested. In this +  // case, the nodes from the lists are chained using TokenFactor, indicating +  // that the operations are independent. +  // +  // In particular, the root is updated before any call that might access the +  // floating-point environment, except for constrained intrinsics. +  switch (EB) { +  case fp::ExceptionBehavior::ebMayTrap: +  case fp::ExceptionBehavior::ebIgnore: +    PendingConstrainedFP.push_back(OutChain); +    break; +  case fp::ExceptionBehavior::ebStrict: +    PendingConstrainedFPStrict.push_back(OutChain); +    break; +  } +} +  void SelectionDAGBuilder::visitConstrainedFPIntrinsic(      const ConstrainedFPIntrinsic &FPI) {    SDLoc sdl = getCurSDLoc(); @@ -8302,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(    // We do not need to serialize constrained FP intrinsics against    // each other or against (nonvolatile) loads, so they can be    // chained like loads. -  SDValue Chain = DAG.getRoot(); +  fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); +  SDValue Chain = getFPOperationRoot(EB);    SmallVector<SDValue, 4> Opers;    Opers.push_back(Chain);    for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I)      Opers.push_back(getValue(FPI.getArgOperand(I))); -  auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { -    assert(Result.getNode()->getNumValues() == 2); - -    // Push node to the appropriate list so that future instructions can be -    // chained up correctly. -    SDValue OutChain = Result.getValue(1); -    switch (EB) { -    case fp::ExceptionBehavior::ebIgnore: -      // The only reason why ebIgnore nodes still need to be chained is that -      // they might depend on the current rounding mode, and therefore must -      // not be moved across instruction that may change that mode. -      [[fallthrough]]; -    case fp::ExceptionBehavior::ebMayTrap: -      // These must not be moved across calls or instructions that may change -      // floating-point exception masks. -      PendingConstrainedFP.push_back(OutChain); -      break; -    case fp::ExceptionBehavior::ebStrict: -      // These must not be moved across calls or instructions that may change -      // floating-point exception masks or read floating-point exception flags. -      // In addition, they cannot be optimized out even if unused. -      PendingConstrainedFPStrict.push_back(OutChain); -      break; -    } -  }; -    const TargetLowering &TLI = DAG.getTargetLoweringInfo();    EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());    SDVTList VTs = DAG.getVTList(VT, MVT::Other); -  fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();    SDNodeFlags Flags;    if (EB == fp::ExceptionBehavior::ebIgnore) @@ -8361,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(          !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {        Opers.pop_back();        SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); -      pushOutChain(Mul, EB); +      pushFPOpOutChain(Mul, EB);        Opcode = ISD::STRICT_FADD;        Opers.clear();        Opers.push_back(Mul.getValue(1)); @@ -8392,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(    }    SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); -  pushOutChain(Result, EB); +  pushFPOpOutChain(Result, EB);    SDValue FPResult = Result.getValue(0);    setValue(&FPI, FPResult); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c7577fa..47e19f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -195,6 +195,11 @@ private:    /// Update root to include all chains from the Pending list.    SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); +  /// Given a node representing a floating-point operation and its specified +  /// exception behavior, this either updates the root or stores the node in +  /// a list to be added to chains latter. +  void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB); +    /// A unique monotonically increasing number used to order the SDNodes we    /// create.    unsigned SDNodeOrder; @@ -300,6 +305,13 @@ public:    /// memory node that may need to be ordered after any prior load instructions.    SDValue getMemoryRoot(); +  /// Return the current virtual root of the Selection DAG, flushing +  /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new +  /// exception behavior (specified by \p EB) differs from that of the pending +  /// instructions. This must be done before emitting constrained FP operation +  /// call. +  SDValue getFPOperationRoot(fp::ExceptionBehavior EB); +    /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)    /// items. This must be done before emitting any call other any other node    /// that may need to be ordered after FP instructions due to other side diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 39cbfad..77377d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -689,6 +689,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {    if (getFlags().hasSameSign())      OS << " samesign"; +  if (getFlags().hasInBounds()) +    OS << " inbounds"; +    if (getFlags().hasNonNeg())      OS << " nneg"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 060b1dd..59798b3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {  }  Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { +  // MSVC CRT has a function to validate security cookie. +  RTLIB::LibcallImpl SecurityCheckCookieLibcall = +      getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); +  if (SecurityCheckCookieLibcall != RTLIB::Unsupported) +    return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall));    return nullptr;  } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index b6169e6..10b7238 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -272,6 +272,12 @@ static cl::opt<bool>                      cl::desc("Split static data sections into hot and cold "                               "sections using profile information")); +cl::opt<bool> EmitBBHash( +    "emit-bb-hash", +    cl::desc( +        "Emit the hash of basic block in the SHT_LLVM_BB_ADDR_MAP section."), +    cl::init(false), cl::Optional); +  /// Allow standard passes to be disabled by command line options. This supports  /// simple binary flags that either suppress the pass or do nothing.  /// i.e. -disable-mypass=false has no effect. @@ -1281,6 +1287,8 @@ void TargetPassConfig::addMachinePasses() {    // address map (or both).    if (TM->getBBSectionsType() != llvm::BasicBlockSection::None ||        TM->Options.BBAddrMap) { +    if (EmitBBHash) +      addPass(llvm::createMachineBlockHashInfoPass());      if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {        addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(            TM->getBBSectionsFuncListBuf())); | 
