diff options
Diffstat (limited to 'llvm/lib/CodeGen')
22 files changed, 417 insertions, 115 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index fefde64f..8aa488f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -41,6 +41,7 @@ #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockHashInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" @@ -184,6 +185,8 @@ static cl::opt<bool> PrintLatency( cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden, cl::init(false)); +extern cl::opt<bool> EmitBBHash; + STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; @@ -474,6 +477,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<GCModuleInfo>(); AU.addRequired<LazyMachineBlockFrequencyInfoPass>(); AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); + if (EmitBBHash) + AU.addRequired<MachineBlockHashInfo>(); } bool AsmPrinter::doInitialization(Module &M) { @@ -1434,14 +1439,11 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, "BB entries info is required for BBFreq and BrProb " "features"); } - return {FuncEntryCountEnabled, - BBFreqEnabled, - BrProbEnabled, + return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1, // Use static_cast to avoid breakage of tests on windows. - static_cast<bool>(BBAddrMapSkipEmitBBEntries), - HasCalls, - false}; + static_cast<bool>(BBAddrMapSkipEmitBBEntries), HasCalls, + static_cast<bool>(EmitBBHash)}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { @@ -1500,6 +1502,9 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { PrevMBBEndSymbol = MBBSymbol; } + auto MBHI = + Features.BBHash ? &getAnalysis<MachineBlockHashInfo>() : nullptr; + if (!Features.OmitBBEntries) { OutStreamer->AddComment("BB id"); // Emit the BB ID for this basic block. @@ -1527,6 +1532,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel); // Emit the Metadata. OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB)); + // Emit the Hash. + if (MBHI) { + OutStreamer->emitInt64(MBHI->getMBBHash(MBB)); + } } PrevMBBEndSymbol = MBB.getEndSymbol(); } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index b6872605..4373c53 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen LowerEmuTLS.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp + MachineBlockHashInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCFGPrinter.cpp diff --git a/llvm/lib/CodeGen/ExpandFp.cpp b/llvm/lib/CodeGen/ExpandFp.cpp index 2b5ced3..f44eb22 100644 --- a/llvm/lib/CodeGen/ExpandFp.cpp +++ b/llvm/lib/CodeGen/ExpandFp.cpp @@ -1108,8 +1108,8 @@ public: }; } // namespace -ExpandFpPass::ExpandFpPass(const TargetMachine *TM, CodeGenOptLevel OptLevel) - : TM(TM), OptLevel(OptLevel) {} +ExpandFpPass::ExpandFpPass(const TargetMachine &TM, CodeGenOptLevel OptLevel) + : TM(&TM), OptLevel(OptLevel) {} void ExpandFpPass::printPipeline( raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b425b95..1f10478 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -391,19 +391,6 @@ void CombinerHelper::applyCombineConcatVectors( MI.eraseFromParent(); } -bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction"); - auto &Shuffle = cast<GShuffleVector>(MI); - - Register SrcVec1 = Shuffle.getSrc1Reg(); - Register SrcVec2 = Shuffle.getSrc2Reg(); - - LLT SrcVec1Type = MRI.getType(SrcVec1); - LLT SrcVec2Type = MRI.getType(SrcVec2); - return SrcVec1Type.isVector() && SrcVec2Type.isVector(); -} - void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const { auto &Shuffle = cast<GShuffleVector>(MI); @@ -535,11 +522,9 @@ bool CombinerHelper::matchCombineShuffleVector( LLT DstType = MRI.getType(MI.getOperand(0).getReg()); Register Src1 = MI.getOperand(1).getReg(); LLT SrcType = MRI.getType(Src1); - // As bizarre as it may look, shuffle vector can actually produce - // scalar! This is because at the IR level a <1 x ty> shuffle - // vector is perfectly valid. - unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1; - unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1; + + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); // If the resulting vector is smaller than the size of the source // vectors being concatenated, we won't be able to replace the @@ -556,7 +541,7 @@ bool CombinerHelper::matchCombineShuffleVector( // // TODO: If the size between the source and destination don't match // we could still emit an extract vector element in that case. - if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) + if (DstNumElts < 2 * SrcNumElts) return false; // Check that the shuffle mask can be broken evenly between the @@ -619,39 +604,6 @@ void CombinerHelper::applyCombineShuffleVector( MI.eraseFromParent(); } -bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) const { - assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && - "Invalid instruction kind"); - - ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - return Mask.size() == 1; -} - -void CombinerHelper::applyShuffleToExtract(MachineInstr &MI) const { - Register DstReg = MI.getOperand(0).getReg(); - Builder.setInsertPt(*MI.getParent(), MI); - - int I = MI.getOperand(3).getShuffleMask()[0]; - Register Src1 = MI.getOperand(1).getReg(); - LLT Src1Ty = MRI.getType(Src1); - int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; - Register SrcReg; - if (I >= Src1NumElts) { - SrcReg = MI.getOperand(2).getReg(); - I -= Src1NumElts; - } else if (I >= 0) - SrcReg = Src1; - - if (I < 0) - Builder.buildUndef(DstReg); - else if (!MRI.getType(SrcReg).isVector()) - Builder.buildCopy(DstReg, SrcReg); - else - Builder.buildExtractVectorElementConstant(DstReg, SrcReg, I); - - MI.eraseFromParent(); -} - namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -8369,7 +8321,7 @@ bool CombinerHelper::matchShuffleDisjointMask(MachineInstr &MI, return false; ArrayRef<int> Mask = Shuffle.getMask(); - const unsigned NumSrcElems = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1; + const unsigned NumSrcElems = Src1Ty.getNumElements(); bool TouchesSrc1 = false; bool TouchesSrc2 = false; diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp index 04d9309..d6f23b6 100644 --- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp +++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp @@ -602,6 +602,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, Depth + 1); computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, Depth + 1); + OffsetKnown = OffsetKnown.sext(BitWidth); + WidthKnown = WidthKnown.sext(BitWidth); Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); // Sign extend the extracted value using shift left and arithmetic shift // right. diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b49040b..1fc90d0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3359,6 +3359,54 @@ bool IRTranslator::translateShuffleVector(const User &U, Mask = SVI->getShuffleMask(); else Mask = cast<ConstantExpr>(U).getShuffleMask(); + + // As GISel does not represent <1 x > vectors as a separate type from scalars, + // we transform shuffle_vector with a scalar output to an + // ExtractVectorElement. If the input type is also scalar it becomes a Copy. + unsigned DstElts = cast<FixedVectorType>(U.getType())->getNumElements(); + unsigned SrcElts = + cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements(); + if (DstElts == 1) { + unsigned M = Mask[0]; + if (SrcElts == 1) { + if (M == 0 || M == 1) + return translateCopy(U, *U.getOperand(M), MIRBuilder); + MIRBuilder.buildUndef(getOrCreateVReg(U)); + } else { + Register Dst = getOrCreateVReg(U); + if (M < SrcElts) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(0)), M); + } else if (M < SrcElts * 2) { + MIRBuilder.buildExtractVectorElementConstant( + Dst, getOrCreateVReg(*U.getOperand(1)), M - SrcElts); + } else { + MIRBuilder.buildUndef(Dst); + } + } + return true; + } + + // A single element src is transformed to a build_vector. + if (SrcElts == 1) { + SmallVector<Register> Ops; + Register Undef; + for (int M : Mask) { + LLT SrcTy = getLLTForType(*U.getOperand(0)->getType(), *DL); + if (M == 0 || M == 1) { + Ops.push_back(getOrCreateVReg(*U.getOperand(M))); + } else { + if (!Undef.isValid()) { + Undef = MRI->createGenericVirtualRegister(SrcTy); + MIRBuilder.buildUndef(Undef); + } + Ops.push_back(Undef); + } + } + MIRBuilder.buildBuildVector(getOrCreateVReg(U), Ops); + return true; + } + ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask); MIRBuilder .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)}, diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 38ec83f..178529f 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { case G_FMINIMUMNUM: case G_FMAXIMUMNUM: return lowerFMinNumMaxNum(MI); + case G_FMINIMUM: + case G_FMAXIMUM: + return lowerFMinimumMaximum(MI); case G_MERGE_VALUES: return lowerMergeValues(MI); case G_UNMERGE_VALUES: @@ -5819,6 +5822,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = MIRBuilder.buildUndef(NarrowTy).getReg(0); + } else if (NewElts == 1) { + Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0); } else { Register Op0 = Inputs[InputUsed[0]]; // If only one input was used, use an undefined vector for the other. @@ -8775,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + auto [Dst, Src0, Src1] = MI.getFirst3Regs(); + LLT Ty = MRI.getType(Dst); + LLT CmpTy = Ty.changeElementSize(1); + + bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM); + unsigned OpcIeee = + IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE; + unsigned OpcNonIeee = + IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM; + bool MinMaxMustRespectOrderedZero = false; + Register Res; + + // IEEE variants don't need canonicalization + if (LI.isLegalOrCustom({OpcIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0); + MinMaxMustRespectOrderedZero = true; + } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) { + Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0); + } else { + auto Compare = MIRBuilder.buildFCmp( + IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1); + Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0); + } + + // Propagate any NaN of both operands + if (!MI.getFlag(MachineInstr::FmNoNans) && + (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) { + auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1); + + LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType(); + APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy)); + Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0); + if (Ty.isVector()) + NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0); + + Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0); + } + + // fminimum/fmaximum requires -0.0 less than +0.0 + if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) { + GISelValueTracking VT(MIRBuilder.getMF()); + KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero); + KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero); + + if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) { + const unsigned Flags = MI.getFlags(); + Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0); + auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero); + + unsigned TestClass = IsMax ? fcPosZero : fcNegZero; + + auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass); + auto LHSSelect = + MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags); + + auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass); + auto RHSSelect = + MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags); + + Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0); + } + } + + MIRBuilder.buildCopy(Dst, Res); + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c Register DstReg = MI.getOperand(0).getReg(); @@ -9016,22 +9092,18 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { continue; } - if (Src0Ty.isScalar()) { - BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); - } else { - int NumElts = Src0Ty.getNumElements(); - Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; - int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; - auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); - auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); - BuildVec.push_back(Extract.getReg(0)); - } + assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR"); + + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); } - if (DstTy.isVector()) - MIRBuilder.buildBuildVector(DstReg, BuildVec); - else - MIRBuilder.buildCopy(DstReg, BuildVec[0]); + assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR"); + MIRBuilder.buildBuildVector(DstReg, BuildVec); MI.eraseFromParent(); return Legalized; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 27df7e3..4b4df98 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -800,10 +800,11 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res, LLT DstTy = Res.getLLTTy(*getMRI()); LLT Src1Ty = Src1.getLLTTy(*getMRI()); LLT Src2Ty = Src2.getLLTTy(*getMRI()); - const LLT DstElemTy = DstTy.isVector() ? DstTy.getElementType() : DstTy; - const LLT ElemTy1 = Src1Ty.isVector() ? Src1Ty.getElementType() : Src1Ty; - const LLT ElemTy2 = Src2Ty.isVector() ? Src2Ty.getElementType() : Src2Ty; + const LLT DstElemTy = DstTy.getScalarType(); + const LLT ElemTy1 = Src1Ty.getScalarType(); + const LLT ElemTy2 = Src2Ty.getScalarType(); assert(DstElemTy == ElemTy1 && DstElemTy == ElemTy2); + assert(Mask.size() > 1 && "Scalar G_SHUFFLE_VECTOR are not supported"); (void)DstElemTy; (void)ElemTy1; (void)ElemTy2; diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 6a464d9..4795d81 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -2788,6 +2788,9 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) { if (expectAndConsume(MIToken::rparen)) return error("shufflemask should be terminated by ')'."); + if (ShufMask.size() < 2) + return error("shufflemask should have > 1 element"); + ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask); Dest = MachineOperand::CreateShuffleMask(MaskAlloc); return false; diff --git a/llvm/lib/CodeGen/MachineBlockHashInfo.cpp b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp new file mode 100644 index 0000000..c4d9c0f --- /dev/null +++ b/llvm/lib/CodeGen/MachineBlockHashInfo.cpp @@ -0,0 +1,115 @@ +//===- llvm/CodeGen/MachineBlockHashInfo.cpp---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Compute the hashes of basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBlockHashInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +uint64_t hashBlock(const MachineBasicBlock &MBB, bool HashOperands) { + uint64_t Hash = 0; + for (const MachineInstr &MI : MBB) { + if (MI.isMetaInstruction() || MI.isTerminator()) + continue; + Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode()); + if (HashOperands) { + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + Hash = + hashing::detail::hash_16_bytes(Hash, hash_value(MI.getOperand(i))); + } + } + } + return Hash; +} + +/// Fold a 64-bit integer to a 16-bit one. +uint16_t fold_64_to_16(const uint64_t Value) { + uint16_t Res = static_cast<uint16_t>(Value); + Res ^= static_cast<uint16_t>(Value >> 16); + Res ^= static_cast<uint16_t>(Value >> 32); + Res ^= static_cast<uint16_t>(Value >> 48); + return Res; +} + +INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash", + "Machine Block Hash Analysis", true, true) + +char MachineBlockHashInfo::ID = 0; + +MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) { + initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry()); +} + +void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +struct CollectHashInfo { + uint64_t Offset; + uint64_t OpcodeHash; + uint64_t InstrHash; + uint64_t NeighborHash; +}; + +bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) { + DenseMap<const MachineBasicBlock *, CollectHashInfo> HashInfos; + uint16_t Offset = 0; + // Initialize hash components + for (const MachineBasicBlock &MBB : F) { + // offset of the machine basic block + HashInfos[&MBB].Offset = Offset; + Offset += MBB.size(); + // Hashing opcodes + HashInfos[&MBB].OpcodeHash = hashBlock(MBB, /*HashOperands=*/false); + // Hash complete instructions + HashInfos[&MBB].InstrHash = hashBlock(MBB, /*HashOperands=*/true); + } + + // Initialize neighbor hash + for (const MachineBasicBlock &MBB : F) { + uint64_t Hash = HashInfos[&MBB].OpcodeHash; + // Append hashes of successors + for (const MachineBasicBlock *SuccMBB : MBB.successors()) { + uint64_t SuccHash = HashInfos[SuccMBB].OpcodeHash; + Hash = hashing::detail::hash_16_bytes(Hash, SuccHash); + } + // Append hashes of predecessors + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { + uint64_t PredHash = HashInfos[PredMBB].OpcodeHash; + Hash = hashing::detail::hash_16_bytes(Hash, PredHash); + } + HashInfos[&MBB].NeighborHash = Hash; + } + + // Assign hashes + for (const MachineBasicBlock &MBB : F) { + const auto &HashInfo = HashInfos[&MBB]; + BlendedBlockHash BlendedHash(fold_64_to_16(HashInfo.Offset), + fold_64_to_16(HashInfo.OpcodeHash), + fold_64_to_16(HashInfo.InstrHash), + fold_64_to_16(HashInfo.NeighborHash)); + MBBHashInfo[&MBB] = BlendedHash.combine(); + } + + return false; +} + +uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) { + return MBBHashInfo[&MBB]; +} + +MachineFunctionPass *llvm::createMachineBlockHashInfoPass() { + return new MachineBlockHashInfo(); +} diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1154855..c0710c4 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1924,13 +1924,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (Src0Ty != Src1Ty) report("Source operands must be the same type", MI); - if (Src0Ty.getScalarType() != DstTy.getScalarType()) + if (Src0Ty.getScalarType() != DstTy.getScalarType()) { report("G_SHUFFLE_VECTOR cannot change element type", MI); + break; + } + if (!Src0Ty.isVector()) { + report("G_SHUFFLE_VECTOR must have vector src", MI); + break; + } + if (!DstTy.isVector()) { + report("G_SHUFFLE_VECTOR must have vector dst", MI); + break; + } // Don't check that all operands are vector because scalars are used in // place of 1 element vectors. - int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1; - int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + int SrcNumElts = Src0Ty.getNumElements(); + int DstNumElts = DstTy.getNumElements(); ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask(); diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index f54e2f2..620d3d3 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -593,7 +593,7 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const { case Intrinsic::log: Changed |= forEachCall(F, [&](CallInst *CI) { Type *Ty = CI->getArgOperand(0)->getType(); - if (!isa<ScalableVectorType>(Ty)) + if (!TM || !isa<ScalableVectorType>(Ty)) return false; const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering(); unsigned Op = TL->IntrinsicIDToISD(F.getIntrinsicID()); diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp index 72b364c..697b779 100644 --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -211,7 +211,7 @@ private: unsigned getSparseSetIndex() const { return VirtReg.virtRegIndex(); } }; - using LiveRegMap = SparseSet<LiveReg, unsigned, identity_cxx20, uint16_t>; + using LiveRegMap = SparseSet<LiveReg, unsigned, identity, uint16_t>; /// This map contains entries for each virtual register that is currently /// available in a physical register. LiveRegMap LiveVirtRegs; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d2ea652..8676060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, // nor a successor of N. Otherwise, if Op is folded that would // create a cycle. unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps(); - for (SDNode *Op : Ptr->users()) { + for (SDUse &U : Ptr->uses()) { + if (U.getResNo() != Ptr.getResNo()) + continue; + // Check for #1. + SDNode *Op = U.getUser(); if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 603dc34..9656a30 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -890,6 +890,7 @@ private: SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 3b5f83f..bb4a8d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -69,6 +69,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: + R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N)); + break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -475,6 +478,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + N->getExtensionType(), SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90edaf3..379242e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8620,7 +8620,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset, if (TLI->shouldPreservePtrArith(this->getMachineFunction().getFunction(), BasePtrVT)) return getNode(ISD::PTRADD, DL, BasePtrVT, Ptr, Offset, Flags); - return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags); + // InBounds only applies to PTRADD, don't set it if we generate ADD. + SDNodeFlags AddFlags = Flags; + AddFlags.setInBounds(false); + return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, AddFlags); } /// Returns true if memcpy source is constant data. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d57c5fb..dee0909 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1162,6 +1162,43 @@ SDValue SelectionDAGBuilder::getMemoryRoot() { return updateRoot(PendingLoads); } +SDValue SelectionDAGBuilder::getFPOperationRoot(fp::ExceptionBehavior EB) { + // If the new exception behavior differs from that of the pending + // ones, chain up them and update the root. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + // Floating-point exceptions produced by such operations are not intended + // to be observed, so the sequence of these operations does not need to be + // preserved. + // + // They however must not be mixed with the instructions that have strict + // exception behavior. Placing an operation with 'ebIgnore' behavior between + // 'ebStrict' operations could distort the observed exception behavior. + if (!PendingConstrainedFPStrict.empty()) { + assert(PendingConstrainedFP.empty()); + updateRoot(PendingConstrainedFPStrict); + } + break; + case fp::ExceptionBehavior::ebStrict: + // Floating-point exception produced by these operations may be observed, so + // they must be correctly chained. If trapping on FP exceptions is + // disabled, the exceptions can be observed only by functions that read + // exception flags, like 'llvm.get_fpenv' or 'fetestexcept'. It means that + // the order of operations is not significant between barriers. + // + // If trapping is enabled, each operation becomes an implicit observation + // point, so the operations must be sequenced according their original + // source order. + if (!PendingConstrainedFP.empty()) { + assert(PendingConstrainedFPStrict.empty()); + updateRoot(PendingConstrainedFP); + } + // TODO: Add support for trapping-enabled scenarios. + } + return DAG.getRoot(); +} + SDValue SelectionDAGBuilder::getRoot() { // Chain up all pending constrained intrinsics together with all // pending loads, by simply appending them to PendingLoads and @@ -4390,6 +4427,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap())) Flags |= SDNodeFlags::NoUnsignedWrap; + Flags.setInBounds(NW.isInBounds()); N = DAG.getMemBasePlusOffset( N, DAG.getConstant(Offset, dl, N.getValueType()), dl, Flags); @@ -4433,6 +4471,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap())) Flags.setNoUnsignedWrap(true); + Flags.setInBounds(NW.isInBounds()); OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); @@ -4502,6 +4541,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // pointer index type (add nuw). SDNodeFlags AddFlags; AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); + AddFlags.setInBounds(NW.isInBounds()); N = DAG.getMemBasePlusOffset(N, IdxN, dl, AddFlags); } @@ -8295,6 +8335,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } } +void SelectionDAGBuilder::pushFPOpOutChain(SDValue Result, + fp::ExceptionBehavior EB) { + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + assert(OutChain.getValueType() == MVT::Other); + + // Instead of updating the root immediately, push the produced chain to the + // appropriate list, deferring the update until the root is requested. In this + // case, the nodes from the lists are chained using TokenFactor, indicating + // that the operations are independent. + // + // In particular, the root is updated before any call that might access the + // floating-point environment, except for constrained intrinsics. + switch (EB) { + case fp::ExceptionBehavior::ebMayTrap: + case fp::ExceptionBehavior::ebIgnore: + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + PendingConstrainedFPStrict.push_back(OutChain); + break; + } +} + void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); @@ -8302,42 +8366,16 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. - SDValue Chain = DAG.getRoot(); + fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); + SDValue Chain = getFPOperationRoot(EB); SmallVector<SDValue, 4> Opers; Opers.push_back(Chain); for (unsigned I = 0, E = FPI.getNonMetadataArgCount(); I != E; ++I) Opers.push_back(getValue(FPI.getArgOperand(I))); - auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { - assert(Result.getNode()->getNumValues() == 2); - - // Push node to the appropriate list so that future instructions can be - // chained up correctly. - SDValue OutChain = Result.getValue(1); - switch (EB) { - case fp::ExceptionBehavior::ebIgnore: - // The only reason why ebIgnore nodes still need to be chained is that - // they might depend on the current rounding mode, and therefore must - // not be moved across instruction that may change that mode. - [[fallthrough]]; - case fp::ExceptionBehavior::ebMayTrap: - // These must not be moved across calls or instructions that may change - // floating-point exception masks. - PendingConstrainedFP.push_back(OutChain); - break; - case fp::ExceptionBehavior::ebStrict: - // These must not be moved across calls or instructions that may change - // floating-point exception masks or read floating-point exception flags. - // In addition, they cannot be optimized out even if unused. - PendingConstrainedFPStrict.push_back(OutChain); - break; - } - }; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); SDVTList VTs = DAG.getVTList(VT, MVT::Other); - fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; if (EB == fp::ExceptionBehavior::ebIgnore) @@ -8361,7 +8399,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); - pushOutChain(Mul, EB); + pushFPOpOutChain(Mul, EB); Opcode = ISD::STRICT_FADD; Opers.clear(); Opers.push_back(Mul.getValue(1)); @@ -8392,7 +8430,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); - pushOutChain(Result, EB); + pushFPOpOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c7577fa..47e19f7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -195,6 +195,11 @@ private: /// Update root to include all chains from the Pending list. SDValue updateRoot(SmallVectorImpl<SDValue> &Pending); + /// Given a node representing a floating-point operation and its specified + /// exception behavior, this either updates the root or stores the node in + /// a list to be added to chains latter. + void pushFPOpOutChain(SDValue Result, fp::ExceptionBehavior EB); + /// A unique monotonically increasing number used to order the SDNodes we /// create. unsigned SDNodeOrder; @@ -300,6 +305,13 @@ public: /// memory node that may need to be ordered after any prior load instructions. SDValue getMemoryRoot(); + /// Return the current virtual root of the Selection DAG, flushing + /// PendingConstrainedFP or PendingConstrainedFPStrict items if the new + /// exception behavior (specified by \p EB) differs from that of the pending + /// instructions. This must be done before emitting constrained FP operation + /// call. + SDValue getFPOperationRoot(fp::ExceptionBehavior EB); + /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict) /// items. This must be done before emitting any call other any other node /// that may need to be ordered after FP instructions due to other side diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 39cbfad..77377d3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -689,6 +689,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasSameSign()) OS << " samesign"; + if (getFlags().hasInBounds()) + OS << " inbounds"; + if (getFlags().hasNonNeg()) OS << " nneg"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 060b1dd..59798b3 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -2097,6 +2097,11 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { } Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { + // MSVC CRT has a function to validate security cookie. + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); return nullptr; } diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index b6169e6..10b7238 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -272,6 +272,12 @@ static cl::opt<bool> cl::desc("Split static data sections into hot and cold " "sections using profile information")); +cl::opt<bool> EmitBBHash( + "emit-bb-hash", + cl::desc( + "Emit the hash of basic block in the SHT_LLVM_BB_ADDR_MAP section."), + cl::init(false), cl::Optional); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -1281,6 +1287,8 @@ void TargetPassConfig::addMachinePasses() { // address map (or both). if (TM->getBBSectionsType() != llvm::BasicBlockSection::None || TM->Options.BBAddrMap) { + if (EmitBBHash) + addPass(llvm::createMachineBlockHashInfoPass()); if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) { addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass( TM->getBBSectionsFuncListBuf())); |
