diff options
Diffstat (limited to 'llvm/lib')
79 files changed, 1614 insertions, 603 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index f9d7e76..67f526f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1292,12 +1292,10 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { } } -DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, - const DISubprogram *CalleeSP, - bool IsTail, - const MCSymbol *PCAddr, - const MCSymbol *CallAddr, - unsigned CallReg) { +DIE &DwarfCompileUnit::constructCallSiteEntryDIE( + DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, + const MCSymbol *PCAddr, const MCSymbol *CallAddr, unsigned CallReg, + DIType *AllocSiteTy) { // Insert a call site entry DIE within ScopeDIE. DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), ScopeDIE, nullptr); @@ -1306,7 +1304,7 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, // Indirect call. addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), MachineLocation(CallReg)); - } else { + } else if (CalleeSP) { DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); assert(CalleeDIE && "Could not create DIE for call site entry origin"); if (AddLinkageNamesToDeclCallOriginsForTuning(DD) && @@ -1351,6 +1349,9 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_return_pc), PCAddr); } + if (AllocSiteTy) + addType(CallSiteDIE, AllocSiteTy, dwarf::DW_AT_LLVM_alloc_type); + return CallSiteDIE; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 09be22c..c2f6ca0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -289,7 +289,8 @@ public: /// the \p CallReg is set to 0. DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, const MCSymbol *PCAddr, - const MCSymbol *CallAddr, unsigned CallReg); + const MCSymbol *CallAddr, unsigned CallReg, + DIType *AllocSiteTy); /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params /// were collected by the \ref collectCallSiteParameters. /// Note: The order of parameters does not matter, since debuggers recognize diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5ae2d2a..c27f100 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -936,6 +936,8 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, if (MI.hasDelaySlot() && !delaySlotSupported(*&MI)) return; + DIType *AllocSiteTy = dyn_cast_or_null<DIType>(MI.getHeapAllocMarker()); + // If this is a direct call, find the callee's subprogram. // In the case of an indirect call find the register that holds // the callee. @@ -950,23 +952,23 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, PhysRegCalleeOperand = PhysRegCalleeOperand && MCOI.OperandType == MCOI::OPERAND_REGISTER; } - if (!CalleeOp.isGlobal() && !PhysRegCalleeOperand) - continue; unsigned CallReg = 0; const DISubprogram *CalleeSP = nullptr; const Function *CalleeDecl = nullptr; if (PhysRegCalleeOperand) { - CallReg = CalleeOp.getReg(); - if (!CallReg) - continue; - } else { + CallReg = CalleeOp.getReg(); // might be zero + } else if (CalleeOp.isGlobal()) { CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); - if (!CalleeDecl || !CalleeDecl->getSubprogram()) - continue; - CalleeSP = CalleeDecl->getSubprogram(); + if (CalleeDecl) + CalleeSP = CalleeDecl->getSubprogram(); // might be nullptr } + // Omit DIE if we can't tell where the call goes *and* we don't want to + // add metadata to it. + if (CalleeSP == nullptr && CallReg == 0 && AllocSiteTy == nullptr) + continue; + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). bool IsTail = TII->isTailCall(MI); @@ -1000,7 +1002,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, << (IsTail ? " [IsTail]" : "") << "\n"); DIE &CallSiteDIE = CU.constructCallSiteEntryDIE( - ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg); + ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg, AllocSiteTy); // Optionally emit call-site-param debug info. if (emitDebugEntryValues()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d70e96938..7341914 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9390,8 +9390,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { LLVMContext &Context = *DAG.getContext(); unsigned NumStores = Stores.size(); unsigned WideNumBits = NumStores * NarrowNumBits; - EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); - if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64) + if (WideNumBits != 16 && WideNumBits != 32 && WideNumBits != 64) return SDValue(); // Check if all bytes of the source value that we are looking at are stored @@ -9445,7 +9444,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = WideVal; // Give up if the source value type is smaller than the store size. - if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits()) + if (SourceValue.getScalarValueSizeInBits() < WideNumBits) return SDValue(); } @@ -9469,6 +9468,8 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { OffsetMap[Offset] = ByteOffsetFromBase; } + EVT WideVT = EVT::getIntegerVT(Context, WideNumBits); + assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 71a175d..649a310 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6422,6 +6422,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.isUndef()) // sext(undef) = 0, because the top bits will all be the same. return getConstant(0, DL, VT); + + // Skip unnecessary sext_inreg pattern: + // (sext (trunc x)) -> x iff the upper bits are all signbits. + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = N1.getOperand(0); + if (OpOp.getValueType() == VT) { + unsigned NumSignExtBits = + VT.getScalarSizeInBits() - N1.getScalarValueSizeInBits(); + if (ComputeNumSignBits(OpOp) > NumSignExtBits) { + transferDbgValues(N1, OpOp); + return OpOp; + } + } + } break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && N1.getValueType().isInteger() && diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a68f521..e235d14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5118,6 +5118,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); } + // fold (setcc (trunc x) c) -> (setcc x c) + if (N0.getOpcode() == ISD::TRUNCATE && + ((N0->getFlags().hasNoUnsignedWrap() && !ISD::isSignedIntSetCC(Cond)) || + (N0->getFlags().hasNoSignedWrap() && + !ISD::isUnsignedIntSetCC(Cond))) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + EVT NewVT = N0.getOperand(0).getValueType(); + SDValue NewConst = DAG.getConstant(ISD::isSignedIntSetCC(Cond) + ? C1.sext(NewVT.getSizeInBits()) + : C1.zext(NewVT.getSizeInBits()), + dl, NewVT); + return DAG.getSetCC(dl, VT, N0.getOperand(0), NewConst, Cond); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -5654,6 +5668,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return N0; } + // Fold (setcc (trunc x) (trunc y)) -> (setcc x y) + if (N0.getOpcode() == ISD::TRUNCATE && N1.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + ((!ISD::isSignedIntSetCC(Cond) && N0->getFlags().hasNoUnsignedWrap() && + N1->getFlags().hasNoUnsignedWrap()) || + (!ISD::isUnsignedIntSetCC(Cond) && N0->getFlags().hasNoSignedWrap() && + N1->getFlags().hasNoSignedWrap())) && + isTypeDesirableForOp(ISD::SETCC, N0.getOperand(0).getValueType())) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + } + // Could not fold it. return SDValue(); } diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 6ddb12b..8052773 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -109,6 +109,7 @@ static bool isODRAttribute(uint16_t Attr) { case dwarf::DW_AT_specification: case dwarf::DW_AT_abstract_origin: case dwarf::DW_AT_import: + case dwarf::DW_AT_LLVM_alloc_type: return true; } llvm_unreachable("Improper attribute."); diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index e0a9758..7dd0611 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -203,8 +203,10 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, // find machine architecture for this module std::string errMsg; const Target *march = TargetRegistry::lookupTarget(Triple, errMsg); - if (!march) + if (!march) { + Context.emitError(errMsg); return make_error_code(object::object_error::arch_not_found); + } // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 93614cd..9a5e070 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -2432,6 +2432,11 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst, void MCAsmStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { + if (CurFrag) { + MCSection *Sec = getCurrentSectionOnly(); + Sec->setHasInstructions(true); + } + if (MAI->isAIX() && CurFrag) // Now that a machine instruction has been assembled into this section, make // a line entry for any .loc directive that has been seen. diff --git a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp index 89c1df8..07514dd 100644 --- a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp @@ -301,7 +301,7 @@ void MachOWriter::writeSymbolTable() { O.LoadCommands[*O.SymTabCommandIndex] .MachOLoadCommand.symtab_command_data; - char *SymTable = (char *)Buf->getBufferStart() + SymTabCommand.symoff; + char *SymTable = Buf->getBufferStart() + SymTabCommand.symoff; for (auto &Symbol : O.SymTable.Symbols) { SymbolEntry *Sym = Symbol.get(); uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name); @@ -319,7 +319,7 @@ void MachOWriter::writeRebaseInfo() { const MachO::dyld_info_command &DyLdInfoCommand = O.LoadCommands[*O.DyLdInfoCommandIndex] .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.rebase_off; + char *Out = Buf->getBufferStart() + DyLdInfoCommand.rebase_off; assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) && "Incorrect rebase opcodes size"); memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size()); @@ -331,7 +331,7 @@ void MachOWriter::writeBindInfo() { const MachO::dyld_info_command &DyLdInfoCommand = O.LoadCommands[*O.DyLdInfoCommandIndex] .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.bind_off; + char *Out = Buf->getBufferStart() + DyLdInfoCommand.bind_off; assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) && "Incorrect bind opcodes size"); memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size()); @@ -343,7 +343,7 @@ void MachOWriter::writeWeakBindInfo() { const MachO::dyld_info_command &DyLdInfoCommand = O.LoadCommands[*O.DyLdInfoCommandIndex] .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; + char *Out = Buf->getBufferStart() + DyLdInfoCommand.weak_bind_off; assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) && "Incorrect weak bind opcodes size"); memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size()); @@ -355,7 +355,7 @@ void MachOWriter::writeLazyBindInfo() { const MachO::dyld_info_command &DyLdInfoCommand = O.LoadCommands[*O.DyLdInfoCommandIndex] .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; + char *Out = Buf->getBufferStart() + DyLdInfoCommand.lazy_bind_off; assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) && "Incorrect lazy bind opcodes size"); memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size()); @@ -367,7 +367,7 @@ void MachOWriter::writeExportInfo() { const MachO::dyld_info_command &DyLdInfoCommand = O.LoadCommands[*O.DyLdInfoCommandIndex] .MachOLoadCommand.dyld_info_command_data; - char *Out = (char *)Buf->getBufferStart() + DyLdInfoCommand.export_off; + char *Out = Buf->getBufferStart() + DyLdInfoCommand.export_off; assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) && "Incorrect export trie size"); memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); @@ -397,7 +397,7 @@ void MachOWriter::writeLinkData(std::optional<size_t> LCIndex, return; const MachO::linkedit_data_command &LinkEditDataCommand = O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data; - char *Out = (char *)Buf->getBufferStart() + LinkEditDataCommand.dataoff; + char *Out = Buf->getBufferStart() + LinkEditDataCommand.dataoff; assert((LinkEditDataCommand.datasize == LD.Data.size()) && "Incorrect data size"); memcpy(Out, LD.Data.data(), LD.Data.size()); @@ -574,7 +574,7 @@ void MachOWriter::writeExportsTrieData() { const MachO::linkedit_data_command &ExportsTrieCmd = O.LoadCommands[*O.ExportsTrieCommandIndex] .MachOLoadCommand.linkedit_data_command_data; - char *Out = (char *)Buf->getBufferStart() + ExportsTrieCmd.dataoff; + char *Out = Buf->getBufferStart() + ExportsTrieCmd.dataoff; assert((ExportsTrieCmd.datasize == O.Exports.Trie.size()) && "Incorrect export trie size"); memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size()); diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 5db2642..e09dc94 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -3115,7 +3115,7 @@ void ExportEntry::pushNode(uint64_t offset) { } State.ChildCount = *Children; if (State.ChildCount != 0 && Children + 1 >= Trie.end()) { - *E = malformedError("byte for count of childern in export trie data at " + *E = malformedError("byte for count of children in export trie data at " "node: 0x" + Twine::utohexstr(offset) + " extends past end of trie data"); @@ -3157,7 +3157,7 @@ void ExportEntry::pushDownUntilBottom() { } for (const NodeState &node : nodes()) { if (node.Start == Trie.begin() + childNodeIndex){ - *E = malformedError("loop in childern in export trie data at node: 0x" + + *E = malformedError("loop in children in export trie data at node: 0x" + Twine::utohexstr(Top.Start - Trie.begin()) + " back to node: 0x" + Twine::utohexstr(childNodeIndex)); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 46084c5..3d688a1 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -4949,6 +4949,21 @@ DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) { return *this; } +// Returns a result such that: +// 1. abs(Lo) <= ulp(Hi)/2 +// 2. Hi == RTNE(Hi + Lo) +// 3. Hi + Lo == X + Y +// +// Requires that log2(X) >= log2(Y). +static std::pair<APFloat, APFloat> fastTwoSum(APFloat X, APFloat Y) { + if (!X.isFinite()) + return {X, APFloat::getZero(X.getSemantics(), /*Negative=*/false)}; + APFloat Hi = X + Y; + APFloat Delta = Hi - X; + APFloat Lo = Y - Delta; + return {Hi, Lo}; +} + // Implement addition, subtraction, multiplication and division based on: // "Software for Doubled-Precision Floating-Point Computations", // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. @@ -5218,10 +5233,78 @@ DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand, APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); - auto Ret = Tmp.roundToIntegral(RM); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); - return Ret; + const APFloat &Hi = getFirst(); + const APFloat &Lo = getSecond(); + + APFloat RoundedHi = Hi; + const opStatus HiStatus = RoundedHi.roundToIntegral(RM); + + // We can reduce the problem to just the high part if the input: + // 1. Represents a non-finite value. + // 2. Has a component which is zero. + if (!Hi.isFiniteNonZero() || Lo.isZero()) { + Floats[0] = std::move(RoundedHi); + Floats[1].makeZero(/*Neg=*/false); + return HiStatus; + } + + // Adjust `Rounded` in the direction of `TieBreaker` if `ToRound` was at a + // halfway point. + auto RoundToNearestHelper = [](APFloat ToRound, APFloat Rounded, + APFloat TieBreaker) { + // RoundingError tells us which direction we rounded: + // - RoundingError > 0: we rounded up. + // - RoundingError < 0: we rounded down. + // Sterbenz' lemma ensures that RoundingError is exact. + const APFloat RoundingError = Rounded - ToRound; + if (TieBreaker.isNonZero() && + TieBreaker.isNegative() != RoundingError.isNegative() && + abs(RoundingError).isExactlyValue(0.5)) + Rounded.add( + APFloat::getOne(Rounded.getSemantics(), TieBreaker.isNegative()), + rmNearestTiesToEven); + return Rounded; + }; + + // Case 1: Hi is not an integer. + // Special cases are for rounding modes that are sensitive to ties. + if (RoundedHi != Hi) { + // We need to consider the case where Hi was between two integers and the + // rounding mode broke the tie when, in fact, Lo may have had a different + // sign than Hi. + if (RM == rmNearestTiesToAway || RM == rmNearestTiesToEven) + RoundedHi = RoundToNearestHelper(Hi, RoundedHi, Lo); + + Floats[0] = std::move(RoundedHi); + Floats[1].makeZero(/*Neg=*/false); + return HiStatus; + } + + // Case 2: Hi is an integer. + // Special cases are for rounding modes which are rounding towards or away from zero. + RoundingMode LoRoundingMode; + if (RM == rmTowardZero) + // When our input is positive, we want the Lo component rounded toward + // negative infinity to get the smallest result magnitude. Likewise, + // negative inputs want the Lo component rounded toward positive infinity. + LoRoundingMode = isNegative() ? rmTowardPositive : rmTowardNegative; + else + LoRoundingMode = RM; + + APFloat RoundedLo = Lo; + const opStatus LoStatus = RoundedLo.roundToIntegral(LoRoundingMode); + if (LoRoundingMode == rmNearestTiesToAway) + // We need to consider the case where Lo was between two integers and the + // rounding mode broke the tie when, in fact, Hi may have had a different + // sign than Lo. + RoundedLo = RoundToNearestHelper(Lo, RoundedLo, Hi); + + // We must ensure that the final result has no overlap between the two APFloat values. + std::tie(RoundedHi, RoundedLo) = fastTwoSum(RoundedHi, RoundedLo); + + Floats[0] = std::move(RoundedHi); + Floats[1] = std::move(RoundedLo); + return LoStatus; } void DoubleAPFloat::changeSign() { diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index c369916..30eae6e 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -93,9 +93,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { } } -SMLoc TGLexer::getLoc() const { - return SMLoc::getFromPointer(TokStart); -} +SMLoc TGLexer::getLoc() const { return SMLoc::getFromPointer(TokStart); } SMRange TGLexer::getLocRange() const { return {getLoc(), SMLoc::getFromPointer(CurPtr)}; @@ -162,16 +160,13 @@ int TGLexer::getNextChar() { // Handle the newline character by ignoring it and incrementing the line // count. However, be careful about 'dos style' files with \n\r in them. // Only treat a \n\r or \r\n as a single line. - if ((*CurPtr == '\n' || (*CurPtr == '\r')) && - *CurPtr != CurChar) - ++CurPtr; // Eat the two char newline sequence. + if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar) + ++CurPtr; // Eat the two char newline sequence. return '\n'; } } -int TGLexer::peekNextChar(int Index) const { - return *(CurPtr + Index); -} +int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); } tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { while (true) { @@ -367,7 +362,9 @@ tgtok::TokKind TGLexer::LexString() { ++CurPtr; switch (*CurPtr) { - case '\\': case '\'': case '"': + case '\\': + case '\'': + case '"': // These turn into their literal character. CurStrVal += *CurPtr++; break; @@ -421,7 +418,7 @@ tgtok::TokKind TGLexer::LexIdentifier() { ++CurPtr; // Check to see if this identifier is a reserved keyword. - StringRef Str(IdentStart, CurPtr-IdentStart); + StringRef Str(IdentStart, CurPtr - IdentStart); tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) .Case("int", tgtok::Int) @@ -454,14 +451,15 @@ tgtok::TokKind TGLexer::LexIdentifier() { // A couple of tokens require special processing. switch (Kind) { - case tgtok::Include: - if (LexInclude()) return tgtok::Error; - return Lex(); - case tgtok::Id: - CurStrVal.assign(Str.begin(), Str.end()); - break; - default: - break; + case tgtok::Include: + if (LexInclude()) + return tgtok::Error; + return Lex(); + case tgtok::Id: + CurStrVal.assign(Str.begin(), Str.end()); + break; + default: + break; } return Kind; @@ -472,7 +470,8 @@ tgtok::TokKind TGLexer::LexIdentifier() { bool TGLexer::LexInclude() { // The token after the include must be a string. tgtok::TokKind Tok = LexToken(); - if (Tok == tgtok::Error) return true; + if (Tok == tgtok::Error) + return true; if (Tok != tgtok::StrVal) { PrintError(getLoc(), "expected filename after include"); return true; @@ -501,7 +500,7 @@ bool TGLexer::LexInclude() { /// SkipBCPLComment - Skip over the comment by finding the next CR or LF. /// Or we may end up at the end of the buffer. void TGLexer::SkipBCPLComment() { - ++CurPtr; // skip the second slash. + ++CurPtr; // Skip the second slash. auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data()); CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos; } @@ -509,7 +508,7 @@ void TGLexer::SkipBCPLComment() { /// SkipCComment - This skips C-style /**/ comments. The only difference from C /// is that we allow nesting. bool TGLexer::SkipCComment() { - ++CurPtr; // skip the star. + ++CurPtr; // Skip the star. unsigned CommentDepth = 1; while (true) { @@ -520,15 +519,17 @@ bool TGLexer::SkipCComment() { return true; case '*': // End of the comment? - if (CurPtr[0] != '/') break; + if (CurPtr[0] != '/') + break; - ++CurPtr; // End the */. + ++CurPtr; // End the */. if (--CommentDepth == 0) return false; break; case '/': // Start of a nested comment? - if (CurPtr[0] != '*') break; + if (CurPtr[0] != '*') + break; ++CurPtr; ++CommentDepth; break; @@ -608,14 +609,17 @@ tgtok::TokKind TGLexer::LexBracket() { const char *CodeStart = CurPtr; while (true) { int Char = getNextChar(); - if (Char == EOF) break; + if (Char == EOF) + break; - if (Char != '}') continue; + if (Char != '}') + continue; Char = getNextChar(); - if (Char == EOF) break; + if (Char == EOF) + break; if (Char == ']') { - CurStrVal.assign(CodeStart, CurPtr-2); + CurStrVal.assign(CodeStart, CurPtr - 2); return tgtok::CodeFragment; } } diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 5725e39..753470d 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -216,13 +216,9 @@ private: public: TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros); - tgtok::TokKind Lex() { - return CurCode = LexToken(CurPtr == CurBuf.begin()); - } + tgtok::TokKind Lex() { return CurCode = LexToken(CurPtr == CurBuf.begin()); } - const DependenciesSetTy &getDependencies() const { - return Dependencies; - } + const DependenciesSetTy &getDependencies() const { return Dependencies; } tgtok::TokKind getCode() const { return CurCode; } diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 81b61b1..0c6add5 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -99,11 +99,11 @@ static void checkConcrete(Record &R) { if (const Init *V = RV.getValue()) { bool Ok = isa<BitsInit>(V) ? checkBitsConcrete(R, RV) : V->isConcrete(); if (!Ok) { - PrintError(R.getLoc(), - Twine("Initializer of '") + RV.getNameInitAsString() + - "' in '" + R.getNameInitAsString() + - "' could not be fully resolved: " + - RV.getValue()->getAsString()); + PrintError(R.getLoc(), Twine("Initializer of '") + + RV.getNameInitAsString() + "' in '" + + R.getNameInitAsString() + + "' could not be fully resolved: " + + RV.getValue()->getAsString()); } } } @@ -218,9 +218,10 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) { // The value already exists in the class, treat this as a set. if (ERV->setValue(RV.getValue())) return Error(Loc, "New definition of '" + RV.getName() + "' of type '" + - RV.getType()->getAsString() + "' is incompatible with " + - "previous definition of type '" + - ERV->getType()->getAsString() + "'"); + RV.getType()->getAsString() + + "' is incompatible with " + + "previous definition of type '" + + ERV->getType()->getAsString() + "'"); } else { CurRec->addValue(RV); } @@ -232,14 +233,16 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) { bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const Init *ValName, ArrayRef<unsigned> BitList, const Init *V, bool AllowSelfAssignment, bool OverrideDefLoc) { - if (!V) return false; + if (!V) + return false; - if (!CurRec) CurRec = &CurMultiClass->Rec; + if (!CurRec) + CurRec = &CurMultiClass->Rec; RecordVal *RV = CurRec->getValue(ValName); if (!RV) - return Error(Loc, "Value '" + ValName->getAsUnquotedString() + - "' unknown!"); + return Error(Loc, + "Value '" + ValName->getAsUnquotedString() + "' unknown!"); // Do not allow assignments like 'X = X'. This will just cause infinite loops // in the resolution machinery. @@ -254,7 +257,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const Init *ValName, const auto *CurVal = dyn_cast<BitsInit>(RV->getValue()); if (!CurVal) return Error(Loc, "Value '" + ValName->getAsUnquotedString() + - "' is not a bits type"); + "' is not a bits type"); // Convert the incoming value to a bits type of the appropriate size... const Init *BI = V->getCastTo(BitsRecTy::get(Records, BitList.size())); @@ -268,7 +271,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const Init *ValName, unsigned Bit = BitList[i]; if (NewBits[Bit]) return Error(Loc, "Cannot set bit #" + Twine(Bit) + " of value '" + - ValName->getAsUnquotedString() + "' more than once"); + ValName->getAsUnquotedString() + + "' more than once"); NewBits[Bit] = BI->getBit(i); } @@ -283,7 +287,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const Init *ValName, std::string InitType; if (const auto *BI = dyn_cast<BitsInit>(V)) InitType = (Twine("' of type bit initializer with length ") + - Twine(BI->getNumBits())).str(); + Twine(BI->getNumBits())) + .str(); else if (const auto *TI = dyn_cast<TypedInit>(V)) InitType = (Twine("' of type '") + TI->getType()->getAsString() + "'").str(); @@ -416,9 +421,8 @@ bool TGParser::addEntry(RecordsEntry E) { /// /// The resulting records are stored in \p Dest if non-null. Otherwise, they /// are added to the global record keeper. -bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs, - bool Final, std::vector<RecordsEntry> *Dest, - SMLoc *Loc) { +bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs, bool Final, + std::vector<RecordsEntry> *Dest, SMLoc *Loc) { MapResolver R; for (const auto &S : Substs) @@ -437,28 +441,28 @@ bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs, R.setFinal(true); const Init *LHS = OldLHS->resolveReferences(R); if (LHS == OldLHS) { - PrintError(Loop.Loc, - Twine("unable to resolve if condition '") + - LHS->getAsString() + "' at end of containing scope"); + PrintError(Loop.Loc, Twine("unable to resolve if condition '") + + LHS->getAsString() + + "' at end of containing scope"); return true; } const Init *MHS = TI->getMHS(); const Init *RHS = TI->getRHS(); List = TernOpInit::get(TernOpInit::IF, LHS, MHS, RHS, TI->getType()) - ->Fold(nullptr); + ->Fold(nullptr); } const auto *LI = dyn_cast<ListInit>(List); if (!LI) { if (!Final) { - Dest->emplace_back(std::make_unique<ForeachLoop>(Loop.Loc, Loop.IterVar, - List)); + Dest->emplace_back( + std::make_unique<ForeachLoop>(Loop.Loc, Loop.IterVar, List)); return resolve(Loop.Entries, Substs, Final, &Dest->back().Loop->Entries, Loc); } PrintError(Loop.Loc, Twine("attempting to loop over '") + - List->getAsString() + "', expected a list"); + List->getAsString() + "', expected a list"); return true; } @@ -571,7 +575,7 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) { if (!I->getType()->typeIsA(Defset->EltTy)) { PrintError(Rec->getLoc(), Twine("adding record of incompatible type '") + I->getType()->getAsString() + - "' to defset"); + "' to defset"); PrintNote(Defset->Loc, "location of defset declaration"); return true; } @@ -751,8 +755,8 @@ MultiClass *TGParser::ParseMultiClassID() { /// SubClassRef ::= ClassID /// SubClassRef ::= ClassID '<' ArgValueList '>' /// -SubClassReference TGParser:: -ParseSubClassReference(Record *CurRec, bool isDefm) { +SubClassReference TGParser::ParseSubClassReference(Record *CurRec, + bool isDefm) { SubClassReference Result; Result.RefRange.Start = Lex.getLoc(); @@ -762,7 +766,8 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { } else { Result.Rec = ParseClassID(); } - if (!Result.Rec) return Result; + if (!Result.Rec) + return Result; // If there is no template arg list, we're done. if (!consume(tgtok::less)) { @@ -793,13 +798,14 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { /// SubMultiClassRef ::= MultiClassID /// SubMultiClassRef ::= MultiClassID '<' ArgValueList '>' /// -SubMultiClassReference TGParser:: -ParseSubMultiClassReference(MultiClass *CurMC) { +SubMultiClassReference +TGParser::ParseSubMultiClassReference(MultiClass *CurMC) { SubMultiClassReference Result; Result.RefRange.Start = Lex.getLoc(); Result.MC = ParseMultiClassID(); - if (!Result.MC) return Result; + if (!Result.MC) + return Result; // If there is no template arg list, we're done. if (!consume(tgtok::less)) { @@ -1049,7 +1055,8 @@ bool TGParser::ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges) { // Parse the range list. ParseRangeList(Ranges); - if (Ranges.empty()) return true; + if (Ranges.empty()) + return true; if (!consume(tgtok::greater)) { TokError("expected '>' at end of range list"); @@ -1068,7 +1075,8 @@ bool TGParser::ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges) { // Parse the range list. ParseRangeList(Ranges); - if (Ranges.empty()) return true; + if (Ranges.empty()) + return true; if (!consume(tgtok::r_brace)) { TokError("expected '}' at end of bit list"); @@ -1090,7 +1098,9 @@ bool TGParser::ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges) { /// const RecTy *TGParser::ParseType() { switch (Lex.getCode()) { - default: TokError("Unknown token when expecting a type"); return nullptr; + default: + TokError("Unknown token when expecting a type"); + return nullptr; case tgtok::String: case tgtok::Code: Lex.Lex(); @@ -1129,7 +1139,7 @@ const RecTy *TGParser::ParseType() { TokError("expected '>' at end of bits<n> type"); return nullptr; } - Lex.Lex(); // Eat '>' + Lex.Lex(); // Eat '>' return BitsRecTy::get(Records, Val); } case tgtok::List: { @@ -1137,9 +1147,10 @@ const RecTy *TGParser::ParseType() { TokError("expected '<' after list type"); return nullptr; } - Lex.Lex(); // Eat '<' + Lex.Lex(); // Eat '<' const RecTy *SubType = ParseType(); - if (!SubType) return nullptr; + if (!SubType) + return nullptr; if (!consume(tgtok::greater)) { TokError("expected '>' at end of list<ty> type"); @@ -1206,9 +1217,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { const RecTy *Type = nullptr; switch (Lex.getCode()) { - default: llvm_unreachable("Unhandled code!"); + default: + llvm_unreachable("Unhandled code!"); case tgtok::XCast: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::CAST; Type = ParseOperatorType(); @@ -1235,7 +1247,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { Type = StringRecTy::get(Records); break; case tgtok::XNOT: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::NOT; Type = IntRecTy::get(Records); break; @@ -1245,16 +1257,16 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { Type = IntRecTy::get(Records); // Bogus type used here. break; case tgtok::XLOG2: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::LOG2; Type = IntRecTy::get(Records); break; case tgtok::XHead: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::HEAD; break; case tgtok::XTail: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::TAIL; break; case tgtok::XSize: @@ -1263,12 +1275,12 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { Type = IntRecTy::get(Records); break; case tgtok::XEmpty: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation Code = UnOpInit::EMPTY; Type = IntRecTy::get(Records); break; case tgtok::XGetDagOp: - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation if (Lex.getCode() == tgtok::less) { // Parse an optional type suffix, so that you can say // !getdagop<BaseClass>(someDag) as a shorthand for @@ -1306,7 +1318,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { } const Init *LHS = ParseValue(CurRec); - if (!LHS) return nullptr; + if (!LHS) + return nullptr; if (Code == UnOpInit::EMPTY || Code == UnOpInit::SIZE) { const auto *LHSl = dyn_cast<ListInit>(LHS); @@ -1314,12 +1327,14 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { const auto *LHSd = dyn_cast<DagInit>(LHS); const auto *LHSt = dyn_cast<TypedInit>(LHS); if (!LHSl && !LHSs && !LHSd && !LHSt) { - TokError("expected string, list, or dag type argument in unary operator"); + TokError( + "expected string, list, or dag type argument in unary operator"); return nullptr; } if (LHSt) { if (!isa<ListRecTy, StringRecTy, DagRecTy>(LHSt->getType())) { - TokError("expected string, list, or dag type argument in unary operator"); + TokError( + "expected string, list, or dag type argument in unary operator"); return nullptr; } } @@ -1525,39 +1540,84 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { case tgtok::XSetDagOpName: { // Value ::= !binop '(' Value ',' Value ')' tgtok::TokKind OpTok = Lex.getCode(); SMLoc OpLoc = Lex.getLoc(); - Lex.Lex(); // eat the operation + Lex.Lex(); // eat the operation BinOpInit::BinaryOp Code; switch (OpTok) { - default: llvm_unreachable("Unhandled code!"); - case tgtok::XConcat: Code = BinOpInit::CONCAT; break; + default: + llvm_unreachable("Unhandled code!"); + case tgtok::XConcat: + Code = BinOpInit::CONCAT; + break; case tgtok::XMatch: Code = BinOpInit::MATCH; break; - case tgtok::XADD: Code = BinOpInit::ADD; break; - case tgtok::XSUB: Code = BinOpInit::SUB; break; - case tgtok::XMUL: Code = BinOpInit::MUL; break; - case tgtok::XDIV: Code = BinOpInit::DIV; break; - case tgtok::XAND: Code = BinOpInit::AND; break; - case tgtok::XOR: Code = BinOpInit::OR; break; - case tgtok::XXOR: Code = BinOpInit::XOR; break; - case tgtok::XSRA: Code = BinOpInit::SRA; break; - case tgtok::XSRL: Code = BinOpInit::SRL; break; - case tgtok::XSHL: Code = BinOpInit::SHL; break; - case tgtok::XEq: Code = BinOpInit::EQ; break; - case tgtok::XNe: Code = BinOpInit::NE; break; - case tgtok::XLe: Code = BinOpInit::LE; break; - case tgtok::XLt: Code = BinOpInit::LT; break; - case tgtok::XGe: Code = BinOpInit::GE; break; - case tgtok::XGt: Code = BinOpInit::GT; break; - case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break; - case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break; + case tgtok::XADD: + Code = BinOpInit::ADD; + break; + case tgtok::XSUB: + Code = BinOpInit::SUB; + break; + case tgtok::XMUL: + Code = BinOpInit::MUL; + break; + case tgtok::XDIV: + Code = BinOpInit::DIV; + break; + case tgtok::XAND: + Code = BinOpInit::AND; + break; + case tgtok::XOR: + Code = BinOpInit::OR; + break; + case tgtok::XXOR: + Code = BinOpInit::XOR; + break; + case tgtok::XSRA: + Code = BinOpInit::SRA; + break; + case tgtok::XSRL: + Code = BinOpInit::SRL; + break; + case tgtok::XSHL: + Code = BinOpInit::SHL; + break; + case tgtok::XEq: + Code = BinOpInit::EQ; + break; + case tgtok::XNe: + Code = BinOpInit::NE; + break; + case tgtok::XLe: + Code = BinOpInit::LE; + break; + case tgtok::XLt: + Code = BinOpInit::LT; + break; + case tgtok::XGe: + Code = BinOpInit::GE; + break; + case tgtok::XGt: + Code = BinOpInit::GT; + break; + case tgtok::XListConcat: + Code = BinOpInit::LISTCONCAT; + break; + case tgtok::XListSplat: + Code = BinOpInit::LISTSPLAT; + break; case tgtok::XListRemove: Code = BinOpInit::LISTREMOVE; break; - case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break; - case tgtok::XInterleave: Code = BinOpInit::INTERLEAVE; break; - case tgtok::XSetDagOp: Code = BinOpInit::SETDAGOP; break; + case tgtok::XStrConcat: + Code = BinOpInit::STRCONCAT; + break; + case tgtok::XInterleave: + Code = BinOpInit::INTERLEAVE; + break; + case tgtok::XSetDagOp: + Code = BinOpInit::SETDAGOP; + break; case tgtok::XSetDagOpName: Code = BinOpInit::SETDAGOPNAME; break; @@ -1642,9 +1702,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { } if (Type && ItemType && !Type->typeIsConvertibleTo(ItemType)) { - Error(OpLoc, Twine("expected value of type '") + - ItemType->getAsString() + "', got '" + - Type->getAsString() + "'"); + Error(OpLoc, Twine("expected value of type '") + ItemType->getAsString() + + "', got '" + Type->getAsString() + "'"); return nullptr; } @@ -1660,7 +1719,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { for (;;) { SMLoc InitLoc = Lex.getLoc(); InitList.push_back(ParseValue(CurRec, ArgType)); - if (!InitList.back()) return nullptr; + if (!InitList.back()) + return nullptr; const auto *InitListBack = dyn_cast<TypedInit>(InitList.back()); if (!InitListBack) { @@ -1678,7 +1738,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { case BinOpInit::LISTCONCAT: if (!isa<ListRecTy>(ArgType)) { Error(InitLoc, Twine("expected a list, got value of type '") + - ArgType->getAsString() + "'"); + ArgType->getAsString() + "'"); return nullptr; } break; @@ -1747,9 +1807,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { if (ArgType != StringRecTy::get(Records)->getListTy() && !ArgType->typeIsConvertibleTo( IntRecTy::get(Records)->getListTy())) { - Error(InitLoc, Twine("expected list of string, int, bits, or bit; " - "got value of type '") + - ArgType->getAsString() + "'"); + Error(InitLoc, + Twine("expected list of string, int, bits, or bit; " + "got value of type '") + + ArgType->getAsString() + "'"); return nullptr; } break; @@ -1761,11 +1822,12 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { return nullptr; } break; - default: ; + default:; } ArgType = nullptr; // Broken invariant: types not identical. break; - default: llvm_unreachable("other ops have fixed argument types"); + default: + llvm_unreachable("other ops have fixed argument types"); } } else { @@ -1966,7 +2028,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { tgtok::TokKind LexCode = Lex.getCode(); Lex.Lex(); // Eat the operation. switch (LexCode) { - default: llvm_unreachable("Unhandled code!"); + default: + llvm_unreachable("Unhandled code!"); case tgtok::XDag: Code = TernOpInit::DAG; Type = DagRecTy::get(Records); @@ -1995,7 +2058,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { } const Init *LHS = ParseValue(CurRec); - if (!LHS) return nullptr; + if (!LHS) + return nullptr; if (!consume(tgtok::comma)) { TokError("expected ',' in ternary operator"); @@ -2023,7 +2087,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { } switch (LexCode) { - default: llvm_unreachable("Unhandled code!"); + default: + llvm_unreachable("Unhandled code!"); case tgtok::XDag: { const auto *MHSt = dyn_cast<TypedInit>(MHS); if (!MHSt && !isa<UnsetInit>(MHS)) { @@ -2231,7 +2296,8 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { std::unique_ptr<Record> ParseRecTmp; Record *ParseRec = CurRec; if (!ParseRec) { - ParseRecTmp = std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records); + ParseRecTmp = + std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records); ParseRec = ParseRecTmp.get(); } @@ -2347,9 +2413,8 @@ const Init *TGParser::ParseOperationSubstr(Record *CurRec, } if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { - Error(RHSLoc, Twine("expected value of type '") + - ItemType->getAsString() + "', got '" + - Type->getAsString() + "'"); + Error(RHSLoc, Twine("expected value of type '") + ItemType->getAsString() + + "', got '" + Type->getAsString() + "'"); } const auto *LHSt = dyn_cast<TypedInit>(LHS); @@ -2436,9 +2501,8 @@ const Init *TGParser::ParseOperationFind(Record *CurRec, } if (ItemType && !Type->typeIsConvertibleTo(ItemType)) { - Error(RHSLoc, Twine("expected value of type '") + - ItemType->getAsString() + "', got '" + - Type->getAsString() + "'"); + Error(RHSLoc, Twine("expected value of type '") + ItemType->getAsString() + + "', got '" + Type->getAsString() + "'"); } const auto *LHSt = dyn_cast<TypedInit>(LHS); @@ -2540,10 +2604,9 @@ const Init *TGParser::ParseOperationForEachFilter(Record *CurRec, ? OutListTy->getElementType() : IntRecTy::get(Records); } else { - Error(OpLoc, - "expected value of type '" + - Twine(ItemType->getAsString()) + - "', but got list type"); + Error(OpLoc, "expected value of type '" + + Twine(ItemType->getAsString()) + + "', but got list type"); return nullptr; } } @@ -2554,9 +2617,8 @@ const Init *TGParser::ParseOperationForEachFilter(Record *CurRec, } InEltType = InDagTy; if (ItemType && !isa<DagRecTy>(ItemType)) { - Error(OpLoc, - "expected value of type '" + Twine(ItemType->getAsString()) + - "', but got dag type"); + Error(OpLoc, "expected value of type '" + Twine(ItemType->getAsString()) + + "', but got dag type"); return nullptr; } IsDAG = true; @@ -2610,7 +2672,7 @@ const Init *TGParser::ParseOperationForEachFilter(Record *CurRec, const Init *TGParser::ParseOperationCond(Record *CurRec, const RecTy *ItemType) { - Lex.Lex(); // eat the operation 'cond' + Lex.Lex(); // eat the operation 'cond' if (!consume(tgtok::l_paren)) { TokError("expected '(' after !cond operator"); @@ -2649,7 +2711,8 @@ const Init *TGParser::ParseOperationCond(Record *CurRec, } if (Case.size() < 1) { - TokError("there should be at least 1 'condition : value' in the !cond operator"); + TokError( + "there should be at least 1 'condition : value' in the !cond operator"); return nullptr; } @@ -2672,7 +2735,7 @@ const Init *TGParser::ParseOperationCond(Record *CurRec, const RecTy *RType = resolveTypes(Type, VTy); if (!RType) { TokError(Twine("inconsistent types '") + Type->getAsString() + - "' and '" + VTy->getAsString() + "' for !cond"); + "' and '" + VTy->getAsString() + "' for !cond"); return nullptr; } Type = RType; @@ -2724,7 +2787,9 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, return ParseOperation(CurRec, ItemType); switch (Code) { - default: TokError("Unknown or reserved token when parsing a value"); break; + default: + TokError("Unknown or reserved token when parsing a value"); + break; case tgtok::TrueVal: R = IntInit::get(Records, 1); @@ -2740,7 +2805,7 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, break; case tgtok::BinaryIntVal: { auto BinaryVal = Lex.getCurBinaryIntVal(); - SmallVector<Init*, 16> Bits(BinaryVal.second); + SmallVector<Init *, 16> Bits(BinaryVal.second); for (unsigned i = 0, e = BinaryVal.second; i != e; ++i) Bits[i] = BitInit::get(Records, BinaryVal.first & (1LL << i)); R = BitsInit::get(Records, Bits); @@ -2803,14 +2868,15 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, Class->appendReferenceLoc(NameLoc); return VarDefInit::get(NameLoc.Start, Class, Args)->Fold(); } - case tgtok::l_brace: { // Value ::= '{' ValueList '}' + case tgtok::l_brace: { // Value ::= '{' ValueList '}' SMLoc BraceLoc = Lex.getLoc(); Lex.Lex(); // eat the '{' SmallVector<const Init *, 16> Vals; if (Lex.getCode() != tgtok::r_brace) { ParseValueList(Vals, CurRec); - if (Vals.empty()) return nullptr; + if (Vals.empty()) + return nullptr; } if (!consume(tgtok::r_brace)) { TokError("expected '}' at end of bit list value"); @@ -2845,7 +2911,7 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, const Init *Bit = Vals[i]->getCastTo(BitRecTy::get(Records)); if (!Bit) { Error(BraceLoc, "Element #" + Twine(i) + " (" + Vals[i]->getAsString() + - ") is not convertable to a bit"); + ") is not convertable to a bit"); return nullptr; } NewBits.push_back(Bit); @@ -2853,8 +2919,8 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, std::reverse(NewBits.begin(), NewBits.end()); return BitsInit::get(Records, NewBits); } - case tgtok::l_square: { // Value ::= '[' ValueList ']' - Lex.Lex(); // eat the '[' + case tgtok::l_square: { // Value ::= '[' ValueList ']' + Lex.Lex(); // eat the '[' SmallVector<const Init *, 16> Vals; const RecTy *DeducedEltTy = nullptr; @@ -2873,7 +2939,8 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, if (Lex.getCode() != tgtok::r_square) { ParseValueList(Vals, CurRec, GivenListTy ? GivenListTy->getElementType() : nullptr); - if (Vals.empty()) return nullptr; + if (Vals.empty()) + return nullptr; } if (!consume(tgtok::r_square)) { TokError("expected ']' at end of list value"); @@ -2946,7 +3013,7 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, } case tgtok::l_paren: { // Value ::= '(' IDValue DagArgList ')' // Value ::= '(' '[' ValueList ']' DagArgList ')' - Lex.Lex(); // eat the '(' + Lex.Lex(); // eat the '(' if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast && Lex.getCode() != tgtok::question && Lex.getCode() != tgtok::XGetDagOp && Lex.getCode() != tgtok::l_square) { @@ -2955,7 +3022,8 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, } const Init *Operator = ParseValue(CurRec); - if (!Operator) return nullptr; + if (!Operator) + return nullptr; // If the operator name is present, parse it. const StringInit *OperatorName = nullptr; @@ -2965,13 +3033,14 @@ const Init *TGParser::ParseSimpleValue(Record *CurRec, const RecTy *ItemType, return nullptr; } OperatorName = StringInit::get(Records, Lex.getCurStrVal()); - Lex.Lex(); // eat the VarName. + Lex.Lex(); // eat the VarName. } SmallVector<std::pair<const Init *, const StringInit *>, 8> DagArgs; if (Lex.getCode() != tgtok::r_paren) { ParseDagArgList(DagArgs, CurRec); - if (DagArgs.empty()) return nullptr; + if (DagArgs.empty()) + return nullptr; } if (!consume(tgtok::r_paren)) { @@ -2997,12 +3066,14 @@ const Init *TGParser::ParseValue(Record *CurRec, const RecTy *ItemType, IDParseMode Mode) { SMLoc LHSLoc = Lex.getLoc(); const Init *Result = ParseSimpleValue(CurRec, ItemType, Mode); - if (!Result) return nullptr; + if (!Result) + return nullptr; // Parse the suffixes now if present. while (true) { switch (Lex.getCode()) { - default: return Result; + default: + return Result; case tgtok::l_brace: { if (Mode == ParseNameMode) // This is the beginning of the object body. @@ -3012,7 +3083,8 @@ const Init *TGParser::ParseValue(Record *CurRec, const RecTy *ItemType, Lex.Lex(); // eat the '{' SmallVector<unsigned, 16> Ranges; ParseRangeList(Ranges); - if (Ranges.empty()) return nullptr; + if (Ranges.empty()) + return nullptr; // Reverse the bitlist. std::reverse(Ranges.begin(), Ranges.end()); @@ -3095,7 +3167,7 @@ const Init *TGParser::ParseValue(Record *CurRec, const RecTy *ItemType, } Result = FieldInit::get(Result, FieldName)->Fold(CurRec); - Lex.Lex(); // eat field name + Lex.Lex(); // eat field name break; } @@ -3109,7 +3181,7 @@ const Init *TGParser::ParseValue(Record *CurRec, const RecTy *ItemType, // Check if it's a 'listA # listB' if (isa<ListRecTy>(LHS->getType())) { - Lex.Lex(); // Eat the '#'. + Lex.Lex(); // Eat the '#'. assert(Mode == ParseValueMode && "encountered paste of lists in name"); @@ -3145,7 +3217,7 @@ const Init *TGParser::ParseValue(Record *CurRec, const RecTy *ItemType, const TypedInit *RHS = nullptr; - Lex.Lex(); // Eat the '#'. + Lex.Lex(); // Eat the '#'. switch (Lex.getCode()) { case tgtok::colon: case tgtok::semi: @@ -3223,7 +3295,7 @@ void TGParser::ParseDagArgList( return; } VarName = StringInit::get(Records, Lex.getCurStrVal()); - Lex.Lex(); // eat the VarName. + Lex.Lex(); // eat the VarName. } Result.emplace_back(Val, VarName); @@ -3351,7 +3423,8 @@ const Init *TGParser::ParseDeclaration(Record *CurRec, bool HasField = consume(tgtok::Field); const RecTy *Type = ParseType(); - if (!Type) return nullptr; + if (!Type) + return nullptr; if (Lex.getCode() != tgtok::Id) { TokError("Expected identifier in declaration"); @@ -3440,7 +3513,7 @@ TGParser::ParseForeachDeclaration(const Init *&ForeachListValue) { switch (Lex.getCode()) { case tgtok::l_brace: { // '{' RangeList '}' - Lex.Lex(); // eat the '{' + Lex.Lex(); // eat the '{' ParseRangeList(Ranges); if (!consume(tgtok::r_brace)) { TokError("expected '}' at end of bit range list"); @@ -3471,13 +3544,12 @@ TGParser::ParseForeachDeclaration(const Init *&ForeachListValue) { Error(ValueLoc, "expected a list, got '" + I->getAsString() + "'"); if (CurMultiClass) { PrintNote({}, "references to multiclass template arguments cannot be " - "resolved at this time"); + "resolved at this time"); } return nullptr; } } - if (!Ranges.empty()) { assert(!IterType && "Type already initialized?"); IterType = IntRecTy::get(Records); @@ -3516,7 +3588,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) { while (consume(tgtok::comma)) { // Read the following declarations. SMLoc Loc = Lex.getLoc(); - TemplArg = ParseDeclaration(CurRec, true/*templateargs*/); + TemplArg = ParseDeclaration(CurRec, true /*templateargs*/); if (!TemplArg) return true; @@ -3565,7 +3637,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) { SMLoc IdLoc = Lex.getLoc(); const StringInit *FieldName = StringInit::get(Records, Lex.getCurStrVal()); - Lex.Lex(); // eat the field name. + Lex.Lex(); // eat the field name. SmallVector<unsigned, 16> BitList; if (ParseOptionalBitList(BitList)) @@ -3587,7 +3659,8 @@ bool TGParser::ParseBodyItem(Record *CurRec) { } const Init *Val = ParseValue(CurRec, Type); - if (!Val) return true; + if (!Val) + return true; if (!consume(tgtok::semi)) return TokError("expected ';' after let expression"); @@ -3677,7 +3750,8 @@ bool TGParser::ParseObjectBody(Record *CurRec) { SubClassReference SubClass = ParseSubClassReference(CurRec, false); while (true) { // Check for error. - if (!SubClass.Rec) return true; + if (!SubClass.Rec) + return true; // Add it. if (AddSubClass(CurRec, SubClass)) @@ -3705,7 +3779,7 @@ bool TGParser::ParseObjectBody(Record *CurRec) { bool TGParser::ParseDef(MultiClass *CurMultiClass) { SMLoc DefLoc = Lex.getLoc(); assert(Lex.getCode() == tgtok::Def && "Unknown tok"); - Lex.Lex(); // Eat the 'def' token. + Lex.Lex(); // Eat the 'def' token. // If the name of the def is an Id token, use that for the location. // Otherwise, the name is more complex and we use the location of the 'def' @@ -3867,7 +3941,7 @@ bool TGParser::ParseDefvar(Record *CurRec) { bool TGParser::ParseForeach(MultiClass *CurMultiClass) { SMLoc Loc = Lex.getLoc(); assert(Lex.getCode() == tgtok::Foreach && "Unknown tok"); - Lex.Lex(); // Eat the 'for' token. + Lex.Lex(); // Eat the 'for' token. // Make a temporary object to record items associated with the for // loop. @@ -3892,7 +3966,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) { } else { SMLoc BraceLoc = Lex.getLoc(); // Otherwise, this is a group foreach. - Lex.Lex(); // eat the '{'. + Lex.Lex(); // eat the '{'. // Parse the object list. if (ParseObjectList(CurMultiClass)) @@ -4119,7 +4193,7 @@ void TGParser::ParseLetList(SmallVectorImpl<LetRecord> &Result) { const StringInit *Name = StringInit::get(Records, Lex.getCurStrVal()); SMLoc NameLoc = Lex.getLoc(); - Lex.Lex(); // Eat the identifier. + Lex.Lex(); // Eat the identifier. // Check for an optional RangeList. SmallVector<unsigned, 16> Bits; @@ -4159,7 +4233,8 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) { // Add this entry to the let stack. SmallVector<LetRecord, 8> LetInfo; ParseLetList(LetInfo); - if (LetInfo.empty()) return true; + if (LetInfo.empty()) + return true; LetStack.push_back(std::move(LetInfo)); if (!consume(tgtok::In)) @@ -4170,10 +4245,10 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) { // LET LetList IN Object if (ParseObject(CurMultiClass)) return true; - } else { // Object ::= LETCommand '{' ObjectList '}' + } else { // Object ::= LETCommand '{' ObjectList '}' SMLoc BraceLoc = Lex.getLoc(); // Otherwise, this is a group let. - Lex.Lex(); // eat the '{'. + Lex.Lex(); // eat the '{'. // A group let introduces a new scope for local variables. TGVarScope *LetScope = PushScope(); @@ -4210,7 +4285,7 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) { /// bool TGParser::ParseMultiClass() { assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token"); - Lex.Lex(); // Eat the multiclass token. + Lex.Lex(); // Eat the multiclass token. if (Lex.getCode() != tgtok::Id) return TokError("expected identifier after multiclass for name"); @@ -4223,7 +4298,7 @@ bool TGParser::ParseMultiClass() { return TokError("multiclass '" + Name + "' already defined"); CurMultiClass = Result.first->second.get(); - Lex.Lex(); // Eat the identifier. + Lex.Lex(); // Eat the identifier. // A multiclass body introduces a new scope for local variables. TGVarScope *MulticlassScope = PushScope(CurMultiClass); @@ -4241,10 +4316,11 @@ bool TGParser::ParseMultiClass() { // Read all of the submulticlasses. SubMultiClassReference SubMultiClass = - ParseSubMultiClassReference(CurMultiClass); + ParseSubMultiClassReference(CurMultiClass); while (true) { // Check for error. - if (!SubMultiClass.MC) return true; + if (!SubMultiClass.MC) + return true; // Add it. if (AddSubMultiClass(CurMultiClass, SubMultiClass)) @@ -4262,7 +4338,7 @@ bool TGParser::ParseMultiClass() { if (!consume(tgtok::semi)) return TokError("expected ';' in multiclass definition"); } else { - if (Lex.Lex() == tgtok::r_brace) // eat the '{'. + if (Lex.Lex() == tgtok::r_brace) // eat the '{'. return TokError("multiclass must contain at least one def"); while (Lex.getCode() != tgtok::r_brace) { @@ -4284,7 +4360,7 @@ bool TGParser::ParseMultiClass() { break; } } - Lex.Lex(); // eat the '}'. + Lex.Lex(); // eat the '}'. // If we have a semicolon, print a gentle error. SMLoc SemiLoc = Lex.getLoc(); @@ -4338,7 +4414,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { SubClassReference Ref = ParseSubClassReference(nullptr, true); while (true) { - if (!Ref.Rec) return true; + if (!Ref.Rec) + return true; // To instantiate a multiclass, we get the multiclass and then loop // through its template argument names. Substs contains a substitution @@ -4380,7 +4457,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { SubClassReference SubClass = ParseSubClassReference(nullptr, false); while (true) { // Check for error. - if (!SubClass.Rec) return true; + if (!SubClass.Rec) + return true; // Get the expanded definition prototypes and teach them about // the record values the current class to inherit has @@ -4426,17 +4504,24 @@ bool TGParser::ParseObject(MultiClass *MC) { default: return TokError( "Expected assert, class, def, defm, defset, dump, foreach, if, or let"); - case tgtok::Assert: return ParseAssert(MC); - case tgtok::Def: return ParseDef(MC); - case tgtok::Defm: return ParseDefm(MC); + case tgtok::Assert: + return ParseAssert(MC); + case tgtok::Def: + return ParseDef(MC); + case tgtok::Defm: + return ParseDefm(MC); case tgtok::Deftype: return ParseDeftype(); - case tgtok::Defvar: return ParseDefvar(); + case tgtok::Defvar: + return ParseDefvar(); case tgtok::Dump: return ParseDump(MC); - case tgtok::Foreach: return ParseForeach(MC); - case tgtok::If: return ParseIf(MC); - case tgtok::Let: return ParseTopLevelLet(MC); + case tgtok::Foreach: + return ParseForeach(MC); + case tgtok::If: + return ParseIf(MC); + case tgtok::Let: + return ParseTopLevelLet(MC); case tgtok::Defset: if (MC) return TokError("defset is not allowed inside multiclass"); diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index 2a5a192..7edb6c7 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -167,9 +167,9 @@ class TGParser { // in the middle of creating in. For those situations, allow the // parser to ignore missing object errors. enum IDParseMode { - ParseValueMode, // We are parsing a value we expect to look up. - ParseNameMode, // We are parsing a name of an object that does not yet - // exist. + ParseValueMode, // We are parsing a value we expect to look up. + ParseNameMode, // We are parsing a name of an object that does not yet + // exist. }; bool NoWarnOnUnusedTemplateArgs = false; @@ -191,9 +191,7 @@ public: PrintError(L, Msg); return true; } - bool TokError(const Twine &Msg) const { - return Error(Lex.getLoc(), Msg); - } + bool TokError(const Twine &Msg) const { return Error(Lex.getLoc(), Msg); } const TGLexer::DependenciesSetTy &getDependencies() const { return Lex.getDependencies(); } @@ -257,7 +255,7 @@ private: // Semantic analysis methods. ArrayRef<const ArgumentInit *> ArgValues, const Init *DefmName, SMLoc Loc); -private: // Parser methods. +private: // Parser methods. bool consume(tgtok::TokKind K); bool ParseObjectList(MultiClass *MC = nullptr); bool ParseObject(MultiClass *MC); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2b6ea86..bad7ccd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11390,13 +11390,18 @@ SDValue AArch64TargetLowering::LowerSELECT_CC( // select_cc lhs, rhs, sub(rhs, lhs), sub(lhs, rhs), cc -> // select_cc lhs, rhs, neg(sub(lhs, rhs)), sub(lhs, rhs), cc // The second forms can be matched into subs+cneg. + // NOTE: Drop poison generating flags from the negated operand to avoid + // inadvertently propagating poison after the canonicalisation. if (TVal.getOpcode() == ISD::SUB && FVal.getOpcode() == ISD::SUB) { if (TVal.getOperand(0) == LHS && TVal.getOperand(1) == RHS && - FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS) + FVal.getOperand(0) == RHS && FVal.getOperand(1) == LHS) { + TVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); FVal = DAG.getNegative(TVal, DL, TVal.getValueType()); - else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS && - FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS) + } else if (TVal.getOperand(0) == RHS && TVal.getOperand(1) == LHS && + FVal.getOperand(0) == LHS && FVal.getOperand(1) == RHS) { + FVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags); TVal = DAG.getNegative(FVal, DL, FVal.getValueType()); + } } unsigned Opcode = AArch64ISD::CSEL; @@ -16284,9 +16289,8 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, Chain = SP.getValue(1); SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) - SP = - DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), + DAG.getSignedConstant(-Align->value(), DL, VT)); Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP); SDValue Ops[2] = {SP, Chain}; return DAG.getMergeValues(Ops, DL); @@ -16323,7 +16327,7 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + DAG.getSignedConstant(-Align->value(), DL, VT)); Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP); Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL); @@ -16351,7 +16355,7 @@ AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT)); + DAG.getSignedConstant(-Align->value(), DL, VT)); // Set the real SP to the new value with a probing loop. Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, DL, MVT::Other, Chain, SP); @@ -25450,6 +25454,29 @@ static SDValue performCSELCombine(SDNode *N, } } + // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't + // use overflow flags, to avoid the comparison with zero. In case of success, + // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). + // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB + // nodes with their SUBS equivalent as is already done for other flag-setting + // operators, in which case doing the replacement here becomes redundant. + if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && + isNullConstant(Cond.getOperand(1))) { + SDValue Sub = Cond.getOperand(0); + AArch64CC::CondCode CC = + static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2)); + if (Sub.getOpcode() == ISD::SUB && + (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || + CC == AArch64CC::PL)) { + SDLoc DL(N); + SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), + Sub.getOperand(0), Sub.getOperand(1)); + DCI.CombineTo(Sub.getNode(), Subs); + DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); + return SDValue(N, 0); + } + } + // CSEL (LASTB P, Z), X, NE(ANY P) -> CLASTB P, X, Z if (SDValue CondLast = foldCSELofLASTB(N, DAG)) return CondLast; @@ -28609,14 +28636,16 @@ Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { void AArch64TargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT provides functionalities for stack protection. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) { + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) { // MSVC CRT has a global variable holding security cookie. M.getOrInsertGlobal("__security_cookie", PointerType::getUnqual(M.getContext())); // MSVC CRT has a function to validate security cookie. FunctionCallee SecurityCheckCookie = - M.getOrInsertFunction(Subtarget->getSecurityCheckCookieName(), + M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall), Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext())); if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { @@ -28637,8 +28666,10 @@ Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const { Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const { // MSVC CRT has a function to validate security cookie. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) - return M.getFunction(Subtarget->getSecurityCheckCookieName()); + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookieLibcall)); return TargetLowering::getSSPStackGuardCheck(M); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 5a537f2..d068a12 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -12564,7 +12564,7 @@ multiclass STOPregister<string asm, string instr> { let Predicates = [HasLSUI] in class BaseSTOPregisterLSUI<string asm, RegisterClass OP, Register Reg, Instruction inst> : - InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn), 0>; + InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>; multiclass STOPregisterLSUI<string asm, string instr> { def : BaseSTOPregisterLSUI<asm # "l", GPR32, WZR, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 59d4fd2..fb59c9f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5861,33 +5861,41 @@ void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( } } -// Convenience function to create a DWARF expression for -// Expr + NumBytes + NumVGScaledBytes * AArch64::VG -static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes, - int NumVGScaledBytes, unsigned VG, - llvm::raw_string_ostream &Comment) { - uint8_t buffer[16]; - - if (NumBytes) { +// Convenience function to create a DWARF expression for: Constant `Operation`. +// This helper emits compact sequences for common cases. For example, for`-15 +// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus. +static void appendConstantExpr(SmallVectorImpl<char> &Expr, int64_t Constant, + dwarf::LocationAtom Operation) { + if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) { + // -Constant (1 to 31) + Expr.push_back(dwarf::DW_OP_lit0 - Constant); + Operation = dwarf::DW_OP_minus; + } else if (Constant >= 0 && Constant <= 31) { + // Literal value 0 to 31 + Expr.push_back(dwarf::DW_OP_lit0 + Constant); + } else { + // Signed constant Expr.push_back(dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); - Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + appendLEB128<LEB128Sign::Signed>(Expr, Constant); } + return Expr.push_back(Operation); +} - if (NumVGScaledBytes) { - Expr.push_back((uint8_t)dwarf::DW_OP_consts); - Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); - - Expr.push_back((uint8_t)dwarf::DW_OP_bregx); - Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); - Expr.push_back(0); - - Expr.push_back((uint8_t)dwarf::DW_OP_mul); - Expr.push_back((uint8_t)dwarf::DW_OP_plus); +// Convenience function to create a DWARF expression for a register. +static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) { + Expr.push_back((char)dwarf::DW_OP_bregx); + appendLEB128<LEB128Sign::Unsigned>(Expr, RegNum); + Expr.push_back(0); +} - Comment << (NumVGScaledBytes < 0 ? " - " : " + ") - << std::abs(NumVGScaledBytes) << " * VG"; +// Convenience function to create a comment for +// (+/-) NumBytes (* RegScale)? +static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, + StringRef RegScale = {}) { + if (NumBytes) { + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + if (!RegScale.empty()) + Comment << ' ' << RegScale; } } @@ -5909,19 +5917,26 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, else Comment << printReg(Reg, &TRI); - // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG) + // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes) SmallString<64> Expr; unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); - Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); - Expr.push_back(0); - appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); + assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)"); + // Reg + NumBytes + Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg); + appendLEB128<LEB128Sign::Signed>(Expr, NumBytes); + appendOffsetComment(NumBytes, Comment); + if (NumVGScaledBytes) { + // + VG * NumVGScaledBytes + appendOffsetComment(NumVGScaledBytes, Comment, "* VG"); + appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true)); + appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul); + Expr.push_back(dwarf::DW_OP_plus); + } // Wrap this into DW_CFA_def_cfa. SmallString<64> DefCfaExpr; DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - uint8_t buffer[16]; - DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer)); + appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size()); DefCfaExpr.append(Expr.str()); return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(), Comment.str()); @@ -5958,17 +5973,25 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI, llvm::raw_string_ostream Comment(CommentBuffer); Comment << printReg(Reg, &TRI) << " @ cfa"; - // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes) + assert(NumVGScaledBytes && "Expected scalable offset"); SmallString<64> OffsetExpr; - appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, - TRI.getDwarfRegNum(AArch64::VG, true), Comment); + // + VG * NumVGScaledBytes + appendOffsetComment(NumVGScaledBytes, Comment, "* VG"); + appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true)); + appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul); + OffsetExpr.push_back(dwarf::DW_OP_plus); + if (NumBytes) { + // + NumBytes + appendOffsetComment(NumBytes, Comment); + appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus); + } // Wrap this into DW_CFA_expression SmallString<64> CfaExpr; CfaExpr.push_back(dwarf::DW_CFA_expression); - uint8_t buffer[16]; - CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); - CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg); + appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size()); CfaExpr.append(OffsetExpr.str()); return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(), diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 061ed61..d00e447 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -451,12 +451,6 @@ public: return "__chkstk"; } - const char* getSecurityCheckCookieName() const { - if (isWindowsArm64EC()) - return "#__security_check_cookie_arm64ec"; - return "__security_check_cookie"; - } - /// Choose a method of checking LR before performing a tail call. AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index d905692..f359731 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1697,7 +1697,7 @@ bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, Pred); AArch64CC::CondCode CC1, CC2; - changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); + changeFCMPPredToAArch64CC(Pred, CC1, CC2); MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); if (CC2 != AArch64CC::AL) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 6681393..2a324e5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -486,12 +486,16 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Pad with s_code_end to help tools and guard against instruction prefetch // causing stale data in caches. Arguably this should be done by the linker, // which is why this isn't done for Mesa. + // Don't do it if there is no code. const MCSubtargetInfo &STI = *getGlobalSTI(); if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) && (STI.getTargetTriple().getOS() == Triple::AMDHSA || STI.getTargetTriple().getOS() == Triple::AMDPAL)) { - OutStreamer->switchSection(getObjFileLowering().getTextSection()); - getTargetStreamer()->EmitCodeEnd(STI); + MCSection *TextSect = getObjFileLowering().getTextSection(); + if (TextSect->hasInstructions()) { + OutStreamer->switchSection(TextSect); + getTargetStreamer()->EmitCodeEnd(STI); + } } // Assign expressions which can only be resolved when all other functions are diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7771f9b..64e68ab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -367,18 +367,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand); setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); - setTruncStoreAction(MVT::v5i32, MVT::v5i1, Expand); - setTruncStoreAction(MVT::v5i32, MVT::v5i8, Expand); - setTruncStoreAction(MVT::v5i32, MVT::v5i16, Expand); - - setTruncStoreAction(MVT::v6i32, MVT::v6i1, Expand); - setTruncStoreAction(MVT::v6i32, MVT::v6i8, Expand); - setTruncStoreAction(MVT::v6i32, MVT::v6i16, Expand); - - setTruncStoreAction(MVT::v7i32, MVT::v7i1, Expand); - setTruncStoreAction(MVT::v7i32, MVT::v7i8, Expand); - setTruncStoreAction(MVT::v7i32, MVT::v7i16, Expand); - setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand); setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1fdf272..a6e4a63 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2271,6 +2271,9 @@ Register AMDGPULegalizerInfo::getSegmentAperture( const unsigned ApertureRegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : AMDGPU::SRC_PRIVATE_BASE; + assert((ApertureRegNo != AMDGPU::SRC_PRIVATE_BASE || + !ST.hasGloballyAddressableScratch()) && + "Cannot use src_private_base with globally addressable scratch!"); // FIXME: It would be more natural to emit a COPY here, but then copy // coalescing would kick in and it would think it's okay to use the "HI" // subregister (instead of extracting the HI 32 bits) which is an artificial @@ -2396,11 +2399,30 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( if (SrcAS == AMDGPUAS::FLAT_ADDRESS && (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS)) { + auto castFlatToLocalOrPrivate = [&](const DstOp &Dst) -> Register { + if (DestAS == AMDGPUAS::PRIVATE_ADDRESS && + ST.hasGloballyAddressableScratch()) { + // flat -> private with globally addressable scratch: subtract + // src_flat_scratch_base_lo. + const LLT S32 = LLT::scalar(32); + Register SrcLo = B.buildExtract(S32, Src, 0).getReg(0); + Register FlatScratchBaseLo = + B.buildInstr(AMDGPU::S_MOV_B32, {S32}, + {Register(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)}) + .getReg(0); + MRI.setRegClass(FlatScratchBaseLo, &AMDGPU::SReg_32RegClass); + Register Sub = B.buildSub(S32, SrcLo, FlatScratchBaseLo).getReg(0); + return B.buildIntToPtr(Dst, Sub).getReg(0); + } + + // Extract low 32-bits of the pointer. + return B.buildExtract(Dst, Src, 0).getReg(0); + }; + // For llvm.amdgcn.addrspacecast.nonnull we can always assume non-null, for // G_ADDRSPACE_CAST we need to guess. if (isa<GIntrinsic>(MI) || isKnownNonNull(Src, MRI, TM, SrcAS)) { - // Extract low 32-bits of the pointer. - B.buildExtract(Dst, Src, 0); + castFlatToLocalOrPrivate(Dst); MI.eraseFromParent(); return true; } @@ -2411,7 +2433,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( auto FlatNull = B.buildConstant(SrcTy, 0); // Extract low 32-bits of the pointer. - auto PtrLo32 = B.buildExtract(DstTy, Src, 0); + auto PtrLo32 = castFlatToLocalOrPrivate(DstTy); auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, FlatNull.getReg(0)); @@ -2425,14 +2447,45 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS)) { auto castLocalOrPrivateToFlat = [&](const DstOp &Dst) -> Register { - Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); - if (!ApertureReg.isValid()) - return false; - // Coerce the type of the low half of the result so we can use // merge_values. Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); + if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS && + ST.hasGloballyAddressableScratch()) { + // For wave32: Addr = (TID[4:0] << 52) + FLAT_SCRATCH_BASE + privateAddr + // For wave64: Addr = (TID[5:0] << 51) + FLAT_SCRATCH_BASE + privateAddr + Register AllOnes = B.buildConstant(S32, -1).getReg(0); + Register ThreadID = B.buildConstant(S32, 0).getReg(0); + ThreadID = B.buildIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {S32}) + .addUse(AllOnes) + .addUse(ThreadID) + .getReg(0); + if (ST.isWave64()) { + ThreadID = B.buildIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {S32}) + .addUse(AllOnes) + .addUse(ThreadID) + .getReg(0); + } + Register ShAmt = + B.buildConstant(S32, 57 - 32 - ST.getWavefrontSizeLog2()).getReg(0); + Register SrcHi = B.buildShl(S32, ThreadID, ShAmt).getReg(0); + Register CvtPtr = + B.buildMergeLikeInstr(DstTy, {SrcAsInt, SrcHi}).getReg(0); + // Accessing src_flat_scratch_base_lo as a 64-bit operand gives the full + // 64-bit hi:lo value. + Register FlatScratchBase = + B.buildInstr(AMDGPU::S_MOV_B64, {S64}, + {Register(AMDGPU::SRC_FLAT_SCRATCH_BASE)}) + .getReg(0); + MRI.setRegClass(FlatScratchBase, &AMDGPU::SReg_64RegClass); + return B.buildPtrAdd(Dst, CvtPtr, FlatScratchBase).getReg(0); + } + + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); + if (!ApertureReg.isValid()) + return false; + // TODO: Should we allow mismatched types but matching sizes in merges to // avoid the ptrtoint? return B.buildMergeLikeInstr(Dst, {SrcAsInt, ApertureReg}).getReg(0); @@ -5788,11 +5841,25 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const { - Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); - auto Unmerge = B.buildUnmerge(LLT::scalar(32), MI.getOperand(2).getReg()); + const LLT S32 = LLT::scalar(32); + auto Unmerge = B.buildUnmerge(S32, MI.getOperand(2).getReg()); Register Hi32 = Unmerge.getReg(1); - B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS && + ST.hasGloballyAddressableScratch()) { + Register FlatScratchBaseHi = + B.buildInstr(AMDGPU::S_MOV_B32, {S32}, + {Register(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)}) + .getReg(0); + MRI.setRegClass(FlatScratchBaseHi, &AMDGPU::SReg_32RegClass); + // Test bits 63..58 against the aperture address. + Register XOR = B.buildXor(S32, Hi32, FlatScratchBaseHi).getReg(0); + B.buildICmp(ICmpInst::ICMP_ULT, MI.getOperand(0), XOR, + B.buildConstant(S32, 1u << 26)); + } else { + Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); + B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); + } MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4d67e4a..63826b7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2098,10 +2098,17 @@ bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) { bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - // Flat -> private/local is a simple truncate. - // Flat -> global is no-op - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + if (SrcAS == AMDGPUAS::FLAT_ADDRESS) { + if (DestAS == AMDGPUAS::PRIVATE_ADDRESS && + Subtarget->hasGloballyAddressableScratch()) { + // Flat -> private requires subtracting src_flat_scratch_base_lo. + return false; + } + + // Flat -> private/local is a simple truncate. + // Flat -> global is no-op return true; + } const GCNTargetMachine &TM = static_cast<const GCNTargetMachine &>(getTargetMachine()); @@ -7650,6 +7657,9 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, const unsigned ApertureRegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : AMDGPU::SRC_PRIVATE_BASE; + assert((ApertureRegNo != AMDGPU::SRC_PRIVATE_BASE || + !Subtarget->hasGloballyAddressableScratch()) && + "Cannot use src_private_base with globally addressable scratch!"); // Note: this feature (register) is broken. When used as a 32-bit operand, // it returns a wrong value (all zeroes?). The real value is in the upper 32 // bits. @@ -7760,6 +7770,18 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, DestAS == AMDGPUAS::PRIVATE_ADDRESS) { SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src); + if (DestAS == AMDGPUAS::PRIVATE_ADDRESS && + Subtarget->hasGloballyAddressableScratch()) { + // flat -> private with globally addressable scratch: subtract + // src_flat_scratch_base_lo. + SDValue FlatScratchBaseLo( + DAG.getMachineNode( + AMDGPU::S_MOV_B32, SL, MVT::i32, + DAG.getRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, MVT::i32)), + 0); + Ptr = DAG.getNode(ISD::SUB, SL, MVT::i32, Ptr, FlatScratchBaseLo); + } + if (IsNonNull || isKnownNonNull(Op, DAG, TM, SrcAS)) return Ptr; @@ -7776,11 +7798,40 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, if (DestAS == AMDGPUAS::FLAT_ADDRESS) { if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { - - SDValue Aperture = getSegmentAperture(SrcAS, SL, DAG); - SDValue CvtPtr = - DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture); - CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr); + SDValue CvtPtr; + if (SrcAS == AMDGPUAS::PRIVATE_ADDRESS && + Subtarget->hasGloballyAddressableScratch()) { + // For wave32: Addr = (TID[4:0] << 52) + FLAT_SCRATCH_BASE + privateAddr + // For wave64: Addr = (TID[5:0] << 51) + FLAT_SCRATCH_BASE + privateAddr + SDValue AllOnes = DAG.getSignedTargetConstant(-1, SL, MVT::i32); + SDValue ThreadID = DAG.getConstant(0, SL, MVT::i32); + ThreadID = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32, + DAG.getTargetConstant(Intrinsic::amdgcn_mbcnt_lo, SL, MVT::i32), + AllOnes, ThreadID); + if (Subtarget->isWave64()) + ThreadID = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32, + DAG.getTargetConstant(Intrinsic::amdgcn_mbcnt_hi, SL, MVT::i32), + AllOnes, ThreadID); + SDValue ShAmt = DAG.getShiftAmountConstant( + 57 - 32 - Subtarget->getWavefrontSizeLog2(), MVT::i32, SL); + SDValue SrcHi = DAG.getNode(ISD::SHL, SL, MVT::i32, ThreadID, ShAmt); + CvtPtr = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, SrcHi); + CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr); + // Accessing src_flat_scratch_base_lo as a 64-bit operand gives the full + // 64-bit hi:lo value. + SDValue FlatScratchBase = { + DAG.getMachineNode( + AMDGPU::S_MOV_B64, SL, MVT::i64, + DAG.getRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE, MVT::i64)), + 0}; + CvtPtr = DAG.getNode(ISD::ADD, SL, MVT::i64, CvtPtr, FlatScratchBase); + } else { + SDValue Aperture = getSegmentAperture(SrcAS, SL, DAG); + CvtPtr = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture); + CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr); + } if (IsNonNull || isKnownNonNull(Op, DAG, TM, SrcAS)) return CvtPtr; @@ -9424,15 +9475,29 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_is_shared: case Intrinsic::amdgcn_is_private: { SDLoc SL(Op); - unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) - ? AMDGPUAS::LOCAL_ADDRESS - : AMDGPUAS::PRIVATE_ADDRESS; - SDValue Aperture = getSegmentAperture(AS, SL, DAG); SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1)); - SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec, DAG.getConstant(1, SL, MVT::i32)); + + unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) + ? AMDGPUAS::LOCAL_ADDRESS + : AMDGPUAS::PRIVATE_ADDRESS; + if (AS == AMDGPUAS::PRIVATE_ADDRESS && + Subtarget->hasGloballyAddressableScratch()) { + SDValue FlatScratchBaseHi( + DAG.getMachineNode( + AMDGPU::S_MOV_B32, DL, MVT::i32, + DAG.getRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, MVT::i32)), + 0); + // Test bits 63..58 against the aperture address. + return DAG.getSetCC( + SL, MVT::i1, + DAG.getNode(ISD::XOR, SL, MVT::i32, SrcHi, FlatScratchBaseHi), + DAG.getConstant(1u << 26, SL, MVT::i32), ISD::SETULT); + } + + SDValue Aperture = getSegmentAperture(AS, SL, DAG); return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ); } case Intrinsic::amdgcn_perm: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 3f61bbd..5f498a3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6122,10 +6122,11 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, !Op.isIdenticalTo(*MO)) return false; - // Do not fold a frame index into an instruction that already has a frame - // index. The frame index handling code doesn't handle fixing up operand - // constraints if there are multiple indexes. - if (Op.isFI() && MO->isFI()) + // Do not fold a non-inlineable and non-register operand into an + // instruction that already has a frame index. The frame index handling + // code could not handle well when a frame index co-exists with another + // non-register operand, unless that operand is an inlineable immediate. + if (Op.isFI()) return false; } } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index ed6b973..81655f5 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -866,7 +866,8 @@ def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16, v4bf16], def SReg_64_XEXEC_XNULL : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16, v4bf16], 32, (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SRC_SHARED_BASE, - SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> { + SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA, + SRC_FLAT_SCRATCH_BASE)> { let CopyCost = 1; let AllocationPriority = 1; let HasSGPR = 1; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7f8b446..ea99cc4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -737,7 +737,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, const RTLIB::LibcallImpl Impl; } LibraryCalls[] = { {RTLIB::FPROUND_F32_F16, RTLIB::__aeabi_f2h}, - {RTLIB::FPROUND_F64_F16, RTLIB::__aeabi_d2h}, {RTLIB::FPEXT_F16_F32, RTLIB::__aeabi_h2f}, }; @@ -20351,7 +20350,8 @@ static bool isIncompatibleReg(const MCPhysReg &PR, MVT VT) { if (PR == 0 || VT == MVT::Other) return false; return (ARM::SPRRegClass.contains(PR) && VT != MVT::f32 && VT != MVT::i32) || - (ARM::DPRRegClass.contains(PR) && VT != MVT::f64); + (ARM::DPRRegClass.contains(PR) && VT != MVT::f64 && + !VT.is64BitVector()); } using RCPair = std::pair<unsigned, const TargetRegisterClass *>; @@ -20784,9 +20784,8 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const Chain = SP.getValue(1); SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size); if (Align) - SP = DAG.getNode( - ISD::AND, DL, MVT::i32, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), DL, MVT::i32)); + SP = DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0), + DAG.getSignedConstant(-Align->value(), DL, MVT::i32)); Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP); SDValue Ops[2] = { SP, Chain }; return DAG.getMergeValues(Ops, DL); @@ -21359,7 +21358,9 @@ bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const { } void ARMTargetLowering::insertSSPDeclarations(Module &M) const { - if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall == RTLIB::Unsupported) return TargetLowering::insertSSPDeclarations(M); // MSVC CRT has a global variable holding security cookie. @@ -21368,23 +21369,32 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const { // MSVC CRT has a function to validate security cookie. FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( - "__security_check_cookie", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(M.getContext())); + getLibcallImplName(SecurityCheckCookieLibcall), + Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext())); if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) F->addParamAttr(0, Attribute::AttrKind::InReg); } Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const { - // MSVC CRT has a global variable holding security cookie. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) + RTLIB::LibcallImpl SecurityCheckCookieLibcall = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookieLibcall != RTLIB::Unsupported) { + // MSVC CRT has a global variable holding security cookie. + // + // FIXME: We have a libcall entry for the correlated check function, but not + // the global name. return M.getGlobalVariable("__security_cookie"); + } + return TargetLowering::getSDagStackGuard(M); } Function *ARMTargetLowering::getSSPStackGuardCheck(const Module &M) const { // MSVC CRT has a function to validate security cookie. - if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) - return M.getFunction("__security_check_cookie"); + RTLIB::LibcallImpl SecurityCheckCookie = + getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE); + if (SecurityCheckCookie != RTLIB::Unsupported) + return M.getFunction(getLibcallImplName(SecurityCheckCookie)); return TargetLowering::getSSPStackGuardCheck(M); } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 5096a8f..d8bb16f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1651,20 +1651,19 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; -def : Pat<(vector_insert v8f32:$xd, (loongarch_movgr2fr_w_la64 GPR:$rj), uimm3:$imm), - (XVINSGR2VR_W $xd, $rj, uimm3:$imm)>; -def : Pat<(vector_insert v4f64:$xd, (f64 (bitconvert i64:$rj)), uimm2:$imm), - (XVINSGR2VR_D $xd, $rj, uimm2:$imm)>; -def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2), - (XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>; -def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2), - (XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>; +def : Pat<(vector_insert v8f32:$xd, (loongarch_movgr2fr_w_la64 GPR:$rj), + uimm3:$imm), + (XVINSGR2VR_W v8f32:$xd, GPR:$rj, uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm), + (XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>; // XVINSVE0_{W/D} def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm), - (XVINSVE0_W $xd, (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), uimm3:$imm)>; + (XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32), + uimm3:$imm)>; def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm), - (XVINSVE0_D $xd, (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), uimm2:$imm)>; + (XVINSVE0_D v4f64:$xd, (SUBREG_TO_REG(i64 0), FPR64:$fj, sub_64), + uimm2:$imm)>; // scalar_to_vector def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)), @@ -1884,10 +1883,10 @@ def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; -def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), - (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; -def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), - (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; +def : Pat<(f32(vector_extract v8f32:$xj, uimm3:$imm)), + (EXTRACT_SUBREG(XVPICKVE_W v8f32:$xj, uimm3:$imm), sub_32)>; +def : Pat<(f64(vector_extract v4f64:$xj, uimm2:$imm)), + (EXTRACT_SUBREG(XVPICKVE_D v4f64:$xj, uimm2:$imm), sub_64)>; // vselect def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 6765ecb..aac611d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1560,18 +1560,6 @@ def : Pat<(setcc (i16 (sext_inreg (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel (PRMT_B32rii i32:$b, 0, (to_sign_extend_selector $sel_b), PrmtNONE), (cond2cc $cc))>; -// A 16-bit comparison of truncated byte extracts can be be converted to 32-bit -// comparison because we know that the truncate is just trancating off zeros -// and that the most-significant byte is also zeros so the meaning of signed and -// unsigned comparisons will not be changed. -def : Pat<(setcc (i16 (trunc (prmt i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE))), - (i16 (trunc (prmt i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE))), - cond:$cc), - (SETP_i32rr (PRMT_B32rii i32:$a, 0, byte_extract_prmt:$sel_a, PrmtNONE), - (PRMT_B32rii i32:$b, 0, byte_extract_prmt:$sel_b, PrmtNONE), - (cond2cc $cc))>; - - def SDTDeclareArrayParam : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; def SDTDeclareScalarParam : diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 30b5fd6..196574e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3925,9 +3925,6 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget.isAIXABI()) - report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX."); - return Op.getOperand(0); } @@ -3984,9 +3981,6 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget.isAIXABI()) - report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX."); - SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -3994,6 +3988,65 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + if (Subtarget.isAIXABI()) { + // On AIX we create a trampoline descriptor by combining the + // entry point and TOC from the global descriptor (FPtr) with the + // nest argument as the environment pointer. + uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4; + MaybeAlign PointerAlign(PointerSize); + auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() + ? (MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant) + : MachineMemOperand::MONone; + + uint64_t TOCPointerOffset = 1 * PointerSize; + uint64_t EnvPointerOffset = 2 * PointerSize; + SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT); + SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT); + + const Value *TrampolineAddr = + cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + const Function *Func = + cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); + + SDValue OutChains[3]; + + // Copy the entry point address from the global descriptor to the + // trampoline buffer. + SDValue LoadEntryPoint = + DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0), + PointerAlign, MMOFlags); + SDValue EPLoadChain = LoadEntryPoint.getValue(1); + OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp, + MachinePointerInfo(TrampolineAddr, 0)); + + // Copy the TOC pointer from the global descriptor to the trampoline + // buffer. + SDValue TOCFromDescriptorPtr = + DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset); + SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr, + MachinePointerInfo(Func, TOCPointerOffset), + PointerAlign, MMOFlags); + SDValue TrampolineTOCPointer = + DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset); + SDValue TOCLoadChain = TOCReg.getValue(1); + OutChains[1] = + DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer, + MachinePointerInfo(TrampolineAddr, TOCPointerOffset)); + + // Store the nest argument into the environment pointer in the trampoline + // buffer. + SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset); + OutChains[2] = + DAG.getStore(Chain, dl, Nest, EnvPointer, + MachinePointerInfo(TrampolineAddr, EnvPointerOffset)); + + SDValue TokenFactor = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + return TokenFactor; + } + bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); @@ -6865,9 +6918,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); - if (ArgFlags.isNest()) - report_fatal_error("Nest arguments are unimplemented."); - static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; @@ -6882,6 +6932,14 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32; + if (ArgFlags.isNest()) { + MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11); + if (!EnvReg) + report_fatal_error("More then one nest argument."); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo)); + return false; + } + if (ArgFlags.isByVal()) { const Align ByValAlign(ArgFlags.getNonZeroByValAlign()); if (ByValAlign > StackAlign) @@ -9593,12 +9651,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, return false; } -bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) { +bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, + bool IsLittleEndian) { assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector"); BitMask.clearAllBits(); EVT VT = BVN.getValueType(0); - APInt ConstValue(VT.getSizeInBits(), 0); + unsigned VTSize = VT.getSizeInBits(); + APInt ConstValue(VTSize, 0); unsigned EltWidth = VT.getScalarSizeInBits(); @@ -9608,8 +9668,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) { if (!CN) return false; - - ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + // The elements in a vector register are ordered in reverse byte order + // between little-endian and big-endian modes. + ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), + IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos); BitPos += EltWidth; } @@ -9640,7 +9702,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // we do not convert it to MTVSRBMI. // The xxleqv instruction sets a vector with all ones. // The xxlxor instruction sets a vector with all zeros. - if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) { + if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) && + BitMask != 0 && BitMask != 0xffff) { SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32); MachineSDNode *MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant); diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 1dc485d..98dd846 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -2175,10 +2175,7 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { // - Other vector types [v16i8, v8i16] require COPY_TO_REGCLASS to/from VRRC // ============================================================================= -class XXEvalPattern<dag pattern, bits<8> imm> - : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - -class XXEvalPatterns<ValueType Vt, dag InputPattern, bits<8> Imm> +class XXEvalPattern<ValueType Vt, dag InputPattern, bits<8> Imm> : Pat<(Vt InputPattern), !if(!or(!eq(Vt, v4i32), !eq(Vt, v2i64)), // VSRC path: direct XXEVAL for v4i32 and v2i64 @@ -2246,26 +2243,26 @@ def VEqv // ============================================================================= multiclass XXEvalTernarySelectAnd<ValueType Vt> { // Pattern: A ? XOR(B,C) : AND(B,C) XXEVAL immediate value: 22 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 22>; // Pattern: A ? NOR(B,C) : AND(B,C) XXEVAL immediate value: 24 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 24>; // Pattern: A ? EQV(B,C) : AND(B,C) XXEVAL immediate value: 25 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 25>; // Pattern: A ? NOT(C) : AND(B,C) XXEVAL immediate value: 26 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 26>; // Pattern: A ? NOT(B) : AND(B,C) XXEVAL immediate value: 28 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VAnd Vt:$vB, Vt:$vC)), 28>; } @@ -2299,83 +2296,83 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { // Anonymous patterns for XXEVAL // AND // and(A, B, C) - def : XXEvalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; // and(A, xor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; // and(A, or(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; // and(A, nor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; // and(A, eqv(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; // and(A, nand(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; // NAND // nand(A, B, C) - def : XXEvalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), !sub(255, 1)>; // nand(A, xor(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), !sub(255, 6)>; // nand(A, or(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), !sub(255, 7)>; // nand(A, nor(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), !sub(255, 8)>; // nand(A, eqv(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), !sub(255, 9)>; // nand(A, nand(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), !sub(255, 14)>; // EQV // (eqv A, B, C) - def : XXEvalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))), 150>; // (eqv A, (and B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>; + def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>; // (eqv A, (or B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>; + def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>; // NOR // (nor A, B, C) - def : XXEvalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>; // (nor A, (and B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>; // (nor A, (eqv B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>; // (nor A, (nand B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>; // (nor A, (nor B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>; // (nor A, (xor B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>; // OR // (or A, B, C) - def : XXEvalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>; // (or A, (and B, C)) - def : XXEvalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>; // (or A, (eqv B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>; // (or A, (nand B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>; // (or A, (nor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>; // (or A, (xor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>; // XOR // (xor A, B, C) - def : XXEvalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>; // (xor A, (and B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>; // (xor A, (or B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>; // XXEval Patterns for ternary Operations. foreach Ty = [v4i32, v2i64, v8i16, v16i8] in { diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 171940e..a7329d2 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1700,6 +1700,18 @@ def TuneNLogNVRGather def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; +def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering", + "EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">; + +def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering", + "EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">; + +def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering", + "EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">; + +def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering", + "EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">; + def TuneDisableLatencySchedHeuristic : SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0077ecf..03e54b3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24691,7 +24691,7 @@ SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size); if (Align) SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT)); + DAG.getSignedConstant(-Align->value(), dl, VT)); // Set the real SP to the new value with a probing loop. Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index d2a6514..27ad10a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -641,13 +641,15 @@ def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)), let Predicates = [HasStdExtZbkb, IsRV32] in { def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))), (PACK GPR:$rs1, GPR:$rs2)>; -def : Pat<(or (or - (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), + +// Match a pattern of 2 bytes being inserted into bits [31:16], with bits +// bits [15:0] coming from a zero extended value. We can use pack with packh for +// bits [31:16]. If bits [15:0] can also be a packh, it can be matched +// separately. +def : Pat<(or (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), - (or - (shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)), - (zexti8 (XLenVT GPR:$op0rs1)))), - (PACK (XLenVT (PACKH GPR:$op0rs1, GPR:$op0rs2)), + (zexti16 (XLenVT GPR:$rs1))), + (PACK (XLenVT GPR:$rs1), (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } @@ -661,6 +663,27 @@ def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)), def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32), (zexti16 (i64 GPR:$rs1)))), (PACKW GPR:$rs1, GPR:$rs2)>; + +// Match a pattern of 2 bytes being inserted into bits [31:16], with bits +// bits [15:0] coming from a zero extended value, and bits [63:32] being +// ignored. We can use packw with packh for bits [31:16]. If bits [15:0] can +// also be a packh, it can be matched separately. +def : Pat<(binop_allwusers<or> + (or (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (zexti16 (XLenVT GPR:$rs1))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +// We need to manually reassociate the patterns because of the binop_allwusers. +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; +def : Pat<(binop_allwusers<or> + (or (zexti16 (XLenVT GPR:$rs1)), + (shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 24))), + (shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 16))), + (PACKW GPR:$rs1, (XLenVT (PACKH GPR:$op1rs1, GPR:$op1rs2)))>; } // Predicates = [HasStdExtZbkb, IsRV64] let Predicates = [HasStdExtZbb, IsRV32] in diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.td b/llvm/lib/Target/RISCV/RISCVMacroFusion.td index 875a93d..39e099b 100644 --- a/llvm/lib/Target/RISCV/RISCVMacroFusion.td +++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.td @@ -91,3 +91,59 @@ def TuneLDADDFusion CheckIsImmOperand<2>, CheckImmOperand<2, 0> ]>>; + +defvar Load = [LB, LH, LW, LD, LBU, LHU, LWU]; + +// Fuse add(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu): +// add(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneADDLoadFusion + : SimpleFusion<"add-load-fusion", "HasADDLoadFusion", "Enable ADD(.UW) + load macrofusion", + CheckOpcode<[ADD, ADD_UW]>, + CheckOpcode<Load>>; + +// Fuse AUIPC followed by by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// auipc rd, imm20 +// load rd, imm12(rd) +def TuneAUIPCLoadFusion + : SimpleFusion<"auipc-load-fusion", "HasAUIPCLoadFusion", + "Enable AUIPC + load macrofusion", + CheckOpcode<[AUIPC]>, + CheckOpcode<Load>>; + +// Fuse LUI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// lui rd, imm[31:12] +// load rd, imm12(rd) +def TuneLUILoadFusion + : SimpleFusion<"lui-load-fusion", "HasLUILoadFusion", + "Enable LUI + load macrofusion", + CheckOpcode<[LUI]>, + CheckOpcode<Load>>; + +// Bitfield extract fusion: similar to TuneShiftedZExtWFusion +// but without the immediate restriction +// slli rd, rs1, imm12 +// srli rd, rd, imm12 +def TuneBFExtFusion + : SimpleFusion<"bfext-fusion", "HasBFExtFusion", + "Enable SLLI+SRLI (bitfield extract) macrofusion", + CheckOpcode<[SLLI]>, + CheckOpcode<[SRLI]>>; + +// Fuse ADDI followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// addi rd, rs1, imm12 +// load rd, imm12(rd) +def TuneADDILoadFusion + : SimpleFusion<"addi-load-fusion", "HasADDILoadFusion", + "Enable ADDI + load macrofusion", + CheckOpcode<[ADDI]>, + CheckOpcode<Load>>; + +// Fuse shXadd(.uw) followed by a load (lb, lh, lw, ld, lbu, lhu, lwu) +// shXadd(.uw) rd, rs1, rs2 +// load rd, imm12(rd) +def TuneSHXADDLoadFusion + : SimpleFusion<"shxadd-load-fusion", "HasSHXADDLoadFusion", + "Enable SH(1|2|3)ADD(.UW) + load macrofusion", + CheckOpcode<[SH1ADD, SH2ADD, SH3ADD, SH1ADD_UW, SH2ADD_UW, SH3ADD_UW]>, + CheckOpcode<Load>>; diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 838edf6..31d2b3a 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -590,12 +590,17 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", FeatureStdExtZicboz, FeatureVendorXVentanaCondOps], [TuneVentanaVeyron, + TuneDisableMISchedLoadClustering, + TuneDisablePostMISchedLoadClustering, + TuneDisablePostMISchedStoreClustering, TuneLUIADDIFusion, TuneAUIPCADDIFusion, TuneZExtHFusion, TuneZExtWFusion, TuneShiftedZExtWFusion, - TuneLDADDFusion]> { + TuneADDLoadFusion, + TuneAUIPCLoadFusion, + TuneLUILoadFusion]> { let MVendorID = 0x61f; let MArchID = 0x8000000000010000; let MImpID = 0x111; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 3f2a83f..66ce134 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -94,16 +94,6 @@ static cl::opt<bool> cl::desc("Enable the loop data prefetch pass"), cl::init(true)); -static cl::opt<bool> EnableMISchedLoadStoreClustering( - "riscv-misched-load-store-clustering", cl::Hidden, - cl::desc("Enable load and store clustering in the machine scheduler"), - cl::init(true)); - -static cl::opt<bool> EnablePostMISchedLoadStoreClustering( - "riscv-postmisched-load-store-clustering", cl::Hidden, - cl::desc("Enable PostRA load and store clustering in the machine scheduler"), - cl::init(true)); - static cl::opt<bool> DisableVectorMaskMutation( "riscv-disable-vector-mask-mutation", cl::desc("Disable the vector mask scheduling mutation"), cl::init(false), @@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, ScheduleDAGInstrs * RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMILive *DAG = createSchedLive(C); - if (EnableMISchedLoadStoreClustering) { + + if (ST.enableMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enableMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } - const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); if (!DisableVectorMaskMutation && ST.hasVInstructions()) DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI)); @@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const { ScheduleDAGInstrs * RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const { + const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>(); ScheduleDAGMI *DAG = createSchedPostRA(C); - if (EnablePostMISchedLoadStoreClustering) { + + if (ST.enablePostMISchedLoadClustering()) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); + + if (ST.enablePostMISchedStoreClustering()) DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h index 78a066b..ed0a1e1 100644 --- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h +++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.h @@ -73,7 +73,11 @@ public: Entry(std::move(CR.Entry)), Exits(std::move(CR.Exits)), Blocks(std::move(CR.Blocks)) {} + ~ConvergenceRegion() { releaseMemory(); } + + ConvergenceRegion &operator=(ConvergenceRegion &&CR) = delete; ConvergenceRegion(const ConvergenceRegion &other) = delete; + ConvergenceRegion &operator=(const ConvergenceRegion &other) = delete; // Returns true if the given basic block belongs to this region, or to one of // its subregion. @@ -101,6 +105,9 @@ public: ~ConvergenceRegionInfo() { releaseMemory(); } + ConvergenceRegionInfo(const ConvergenceRegionInfo &LHS) = delete; + ConvergenceRegionInfo &operator=(const ConvergenceRegionInfo &LHS) = delete; + ConvergenceRegionInfo(ConvergenceRegionInfo &&LHS) : TopLevelRegion(LHS.TopLevelRegion) { if (TopLevelRegion != LHS.TopLevelRegion) { diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index ba09451..6660de9 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_target(SPIRVCodeGen SPIRVGlobalRegistry.cpp SPIRVInstrInfo.cpp SPIRVInstructionSelector.cpp + SPIRVLegalizeImplicitBinding.cpp SPIRVStripConvergentIntrinsics.cpp SPIRVLegalizePointerCast.cpp SPIRVMergeRegionExitTargets.cpp diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp index 64d301e..4ec31bf 100644 --- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp +++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVInstPrinter.cpp @@ -96,7 +96,7 @@ void SPIRVInstPrinter::printOpConstantVarOps(const MCInst *MI, void SPIRVInstPrinter::recordOpExtInstImport(const MCInst *MI) { MCRegister Reg = MI->getOperand(0).getReg(); auto Name = getSPIRVStringOperand(*MI, 1); - auto Set = getExtInstSetFromString(Name); + auto Set = getExtInstSetFromString(std::move(Name)); ExtInstSetIDs.insert({Reg, Set}); } @@ -210,6 +210,7 @@ void SPIRVInstPrinter::printInst(const MCInst *MI, uint64_t Address, case SPIRV::OpConstantF: // The last fixed operand along with any variadic operands that follow // are part of the variable value. + assert(NumFixedOps > 0 && "Expected at least one fixed operand"); printOpConstantVarOps(MI, NumFixedOps - 1, OS); break; case SPIRV::OpCooperativeMatrixMulAddKHR: { diff --git a/llvm/lib/Target/SPIRV/SPIRV.h b/llvm/lib/Target/SPIRV/SPIRV.h index 1688fa3..1934e98 100644 --- a/llvm/lib/Target/SPIRV/SPIRV.h +++ b/llvm/lib/Target/SPIRV/SPIRV.h @@ -23,6 +23,7 @@ ModulePass *createSPIRVPrepareFunctionsPass(const SPIRVTargetMachine &TM); FunctionPass *createSPIRVStructurizerPass(); FunctionPass *createSPIRVMergeRegionExitTargetsPass(); FunctionPass *createSPIRVStripConvergenceIntrinsicsPass(); +ModulePass *createSPIRVLegalizeImplicitBindingPass(); FunctionPass *createSPIRVLegalizePointerCastPass(SPIRVTargetMachine *TM); FunctionPass *createSPIRVRegularizerPass(); FunctionPass *createSPIRVPreLegalizerCombiner(); @@ -49,6 +50,7 @@ void initializeSPIRVRegularizerPass(PassRegistry &); void initializeSPIRVMergeRegionExitTargetsPass(PassRegistry &); void initializeSPIRVPrepareFunctionsPass(PassRegistry &); void initializeSPIRVStripConvergentIntrinsicsPass(PassRegistry &); +void initializeSPIRVLegalizeImplicitBindingPass(PassRegistry &); } // namespace llvm #endif // LLVM_LIB_TARGET_SPIRV_SPIRV_H diff --git a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp index cfe7ef4..d6581b2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAPI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAPI.cpp @@ -156,7 +156,7 @@ SPIRVTranslateModule(Module *M, std::string &SpirvObj, std::string &ErrMsg, } } return SPIRVTranslate(M, SpirvObj, ErrMsg, AllowExtNames, OLevel, - TargetTriple); + std::move(TargetTriple)); } } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 1ebfde2..c2a6e51 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -50,7 +50,8 @@ class SPIRVAsmPrinter : public AsmPrinter { public: explicit SPIRVAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer), ID), ST(nullptr), TII(nullptr) {} + : AsmPrinter(TM, std::move(Streamer), ID), ModuleSectionsEmitted(false), + ST(nullptr), TII(nullptr), MAI(nullptr) {} static char ID; bool ModuleSectionsEmitted; const SPIRVSubtarget *ST; @@ -591,7 +592,9 @@ void SPIRVAsmPrinter::outputAnnotations(const Module &M) { cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts()); StringRef AnnotationString; - getConstantStringInfo(GV, AnnotationString); + [[maybe_unused]] bool Success = + getConstantStringInfo(GV, AnnotationString); + assert(Success && "Failed to get annotation string"); MCInst Inst; Inst.setOpcode(SPIRV::OpDecorate); Inst.addOperand(MCOperand::createReg(Reg)); diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 25cdf72..e6e86b7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -51,7 +51,7 @@ struct IncomingCall { IncomingCall(const std::string BuiltinName, const DemangledBuiltin *Builtin, const Register ReturnRegister, const SPIRVType *ReturnType, const SmallVectorImpl<Register> &Arguments) - : BuiltinName(BuiltinName), Builtin(Builtin), + : BuiltinName(std::move(BuiltinName)), Builtin(Builtin), ReturnRegister(ReturnRegister), ReturnType(ReturnType), Arguments(Arguments) {} @@ -2619,6 +2619,7 @@ static bool generateConvertInst(const StringRef DemangledCall, GR->getSPIRVTypeID(Call->ReturnType)); } + assert(Builtin && "Conversion builtin not found."); if (Builtin->IsSaturated) buildOpDecorate(Call->ReturnRegister, MIRBuilder, SPIRV::Decoration::SaturatedConversion, {}); diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 2c3e087..f5a49e2 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -499,7 +499,7 @@ void SPIRVEmitIntrinsics::propagateElemTypeRec( std::unordered_set<Value *> Visited; DenseMap<Function *, CallInst *> Ptrcasts; propagateElemTypeRec(Op, PtrElemTy, CastElemTy, VisitedSubst, Visited, - Ptrcasts); + std::move(Ptrcasts)); } void SPIRVEmitIntrinsics::propagateElemTypeRec( @@ -897,17 +897,16 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( bool Change = false; for (unsigned i = 0; i < U->getNumOperands(); ++i) { Value *Op = U->getOperand(i); + assert(Op && "Operands should not be null."); Type *OpTy = Op->getType(); Type *Ty = OpTy; - if (Op) { - if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) { - if (Type *NestedTy = - deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8)) - Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace()); - } else { - Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited, - UnknownElemTypeI8); - } + if (auto *PtrTy = dyn_cast<PointerType>(OpTy)) { + if (Type *NestedTy = + deduceElementTypeHelper(Op, Visited, UnknownElemTypeI8)) + Ty = getTypedPointerWrapper(NestedTy, PtrTy->getAddressSpace()); + } else { + Ty = deduceNestedTypeHelper(dyn_cast<User>(Op), OpTy, Visited, + UnknownElemTypeI8); } Tys.push_back(Ty); Change |= Ty != OpTy; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp index 7f0d636..275463e 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitNonSemanticDI.cpp @@ -116,6 +116,7 @@ bool SPIRVEmitNonSemanticDI::emitGlobalDI(MachineFunction &MF) { } } const NamedMDNode *ModuleFlags = M->getNamedMetadata("llvm.module.flags"); + assert(ModuleFlags && "Expected llvm.module.flags metadata to be present"); for (const auto *Op : ModuleFlags->operands()) { const MDOperand &MaybeStrOp = Op->getOperand(1); if (MaybeStrOp.equalsStr("Dwarf Version")) diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index f1436d5..cfe24c8 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -87,7 +87,7 @@ storageClassRequiresExplictLayout(SPIRV::StorageClass::StorageClass SC) { } SPIRVGlobalRegistry::SPIRVGlobalRegistry(unsigned PointerSize) - : PointerSize(PointerSize), Bound(0) {} + : PointerSize(PointerSize), Bound(0), CurMF(nullptr) {} SPIRVType *SPIRVGlobalRegistry::assignIntTypeToVReg(unsigned BitWidth, Register VReg, @@ -474,8 +474,8 @@ Register SPIRVGlobalRegistry::getOrCreateBaseRegister( } if (Type->getOpcode() == SPIRV::OpTypeFloat) { SPIRVType *SpvBaseType = getOrCreateSPIRVFloatType(BitWidth, I, TII); - return getOrCreateConstFP(dyn_cast<ConstantFP>(Val)->getValue(), I, - SpvBaseType, TII, ZeroAsNull); + return getOrCreateConstFP(cast<ConstantFP>(Val)->getValue(), I, SpvBaseType, + TII, ZeroAsNull); } assert(Type->getOpcode() == SPIRV::OpTypeInt); SPIRVType *SpvBaseType = getOrCreateSPIRVIntegerType(BitWidth, I, TII); @@ -1069,7 +1069,8 @@ SPIRVType *SPIRVGlobalRegistry::createSPIRVType( MIRBuilder); }; } - return getOpTypeStruct(SType, MIRBuilder, AccQual, Decorator, EmitIR); + return getOpTypeStruct(SType, MIRBuilder, AccQual, std::move(Decorator), + EmitIR); } if (auto FType = dyn_cast<FunctionType>(Ty)) { SPIRVType *RetTy = findSPIRVType(FType->getReturnType(), MIRBuilder, @@ -1406,8 +1407,9 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateLayoutType( // We need a new OpTypeStruct instruction because decorations will be // different from a struct with an explicit layout created from a different // entry point. - SPIRVType *SPIRVStructType = getOpTypeStruct( - ST, MIRBuilder, SPIRV::AccessQualifier::None, Decorator, EmitIr); + SPIRVType *SPIRVStructType = + getOpTypeStruct(ST, MIRBuilder, SPIRV::AccessQualifier::None, + std::move(Decorator), EmitIr); add(Key, SPIRVStructType); return SPIRVStructType; } diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index e9f5ffa..5259db1 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -362,6 +362,7 @@ SPIRVInstructionSelector::SPIRVInstructionSelector(const SPIRVTargetMachine &TM, const RegisterBankInfo &RBI) : InstructionSelector(), STI(ST), TII(*ST.getInstrInfo()), TRI(*ST.getRegisterInfo()), RBI(RBI), GR(*ST.getSPIRVGlobalRegistry()), + MRI(nullptr), #define GET_GLOBALISEL_PREDICATES_INIT #include "SPIRVGenGlobalISel.inc" #undef GET_GLOBALISEL_PREDICATES_INIT @@ -3574,7 +3575,7 @@ bool SPIRVInstructionSelector::selectFirstBitSet64Overflow( // Join all the resulting registers back into the return type in order // (ie i32x2, i32x2, i32x1 -> i32x5) - return selectOpWithSrcs(ResVReg, ResType, I, PartialRegs, + return selectOpWithSrcs(ResVReg, ResType, I, std::move(PartialRegs), SPIRV::OpCompositeConstruct); } diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp new file mode 100644 index 0000000..0398e52 --- /dev/null +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizeImplicitBinding.cpp @@ -0,0 +1,159 @@ +//===- SPIRVLegalizeImplicitBinding.cpp - Legalize implicit bindings ----*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass legalizes the @llvm.spv.resource.handlefromimplicitbinding +// intrinsic by replacing it with a call to +// @llvm.spv.resource.handlefrombinding. +// +//===----------------------------------------------------------------------===// + +#include "SPIRV.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include <algorithm> +#include <vector> + +using namespace llvm; + +namespace { +class SPIRVLegalizeImplicitBinding : public ModulePass { +public: + static char ID; + SPIRVLegalizeImplicitBinding() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; + +private: + void collectBindingInfo(Module &M); + uint32_t getAndReserveFirstUnusedBinding(uint32_t DescSet); + void replaceImplicitBindingCalls(Module &M); + + // A map from descriptor set to a bit vector of used binding numbers. + std::vector<BitVector> UsedBindings; + // A list of all implicit binding calls, to be sorted by order ID. + SmallVector<CallInst *, 16> ImplicitBindingCalls; +}; + +struct BindingInfoCollector : public InstVisitor<BindingInfoCollector> { + std::vector<BitVector> &UsedBindings; + SmallVector<CallInst *, 16> &ImplicitBindingCalls; + + BindingInfoCollector(std::vector<BitVector> &UsedBindings, + SmallVector<CallInst *, 16> &ImplicitBindingCalls) + : UsedBindings(UsedBindings), ImplicitBindingCalls(ImplicitBindingCalls) { + } + + void visitCallInst(CallInst &CI) { + if (CI.getIntrinsicID() == Intrinsic::spv_resource_handlefrombinding) { + const uint32_t DescSet = + cast<ConstantInt>(CI.getArgOperand(0))->getZExtValue(); + const uint32_t Binding = + cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue(); + + if (UsedBindings.size() <= DescSet) { + UsedBindings.resize(DescSet + 1); + UsedBindings[DescSet].resize(64); + } + if (UsedBindings[DescSet].size() <= Binding) { + UsedBindings[DescSet].resize(2 * Binding + 1); + } + UsedBindings[DescSet].set(Binding); + } else if (CI.getIntrinsicID() == + Intrinsic::spv_resource_handlefromimplicitbinding) { + ImplicitBindingCalls.push_back(&CI); + } + } +}; + +void SPIRVLegalizeImplicitBinding::collectBindingInfo(Module &M) { + BindingInfoCollector InfoCollector(UsedBindings, ImplicitBindingCalls); + InfoCollector.visit(M); + + // Sort the collected calls by their order ID. + std::sort( + ImplicitBindingCalls.begin(), ImplicitBindingCalls.end(), + [](const CallInst *A, const CallInst *B) { + const uint32_t OrderIdArgIdx = 0; + const uint32_t OrderA = + cast<ConstantInt>(A->getArgOperand(OrderIdArgIdx))->getZExtValue(); + const uint32_t OrderB = + cast<ConstantInt>(B->getArgOperand(OrderIdArgIdx))->getZExtValue(); + return OrderA < OrderB; + }); +} + +uint32_t SPIRVLegalizeImplicitBinding::getAndReserveFirstUnusedBinding( + uint32_t DescSet) { + if (UsedBindings.size() <= DescSet) { + UsedBindings.resize(DescSet + 1); + UsedBindings[DescSet].resize(64); + } + + int NewBinding = UsedBindings[DescSet].find_first_unset(); + if (NewBinding == -1) { + NewBinding = UsedBindings[DescSet].size(); + UsedBindings[DescSet].resize(2 * NewBinding + 1); + } + + UsedBindings[DescSet].set(NewBinding); + return NewBinding; +} + +void SPIRVLegalizeImplicitBinding::replaceImplicitBindingCalls(Module &M) { + for (CallInst *OldCI : ImplicitBindingCalls) { + IRBuilder<> Builder(OldCI); + const uint32_t DescSet = + cast<ConstantInt>(OldCI->getArgOperand(1))->getZExtValue(); + const uint32_t NewBinding = getAndReserveFirstUnusedBinding(DescSet); + + SmallVector<Value *, 8> Args; + Args.push_back(Builder.getInt32(DescSet)); + Args.push_back(Builder.getInt32(NewBinding)); + + // Copy the remaining arguments from the old call. + for (uint32_t i = 2; i < OldCI->arg_size(); ++i) { + Args.push_back(OldCI->getArgOperand(i)); + } + + Function *NewFunc = Intrinsic::getOrInsertDeclaration( + &M, Intrinsic::spv_resource_handlefrombinding, OldCI->getType()); + CallInst *NewCI = Builder.CreateCall(NewFunc, Args); + NewCI->setCallingConv(OldCI->getCallingConv()); + + OldCI->replaceAllUsesWith(NewCI); + OldCI->eraseFromParent(); + } +} + +bool SPIRVLegalizeImplicitBinding::runOnModule(Module &M) { + collectBindingInfo(M); + if (ImplicitBindingCalls.empty()) { + return false; + } + + replaceImplicitBindingCalls(M); + return true; +} +} // namespace + +char SPIRVLegalizeImplicitBinding::ID = 0; + +INITIALIZE_PASS(SPIRVLegalizeImplicitBinding, "legalize-spirv-implicit-binding", + "Legalize SPIR-V implicit bindings", false, false) + +ModulePass *llvm::createSPIRVLegalizeImplicitBindingPass() { + return new SPIRVLegalizeImplicitBinding(); +}
\ No newline at end of file diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index ab06fc0..8039cf0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -93,7 +93,7 @@ getSymbolicOperandRequirements(SPIRV::OperandCategory::OperandCategory Category, if (Reqs.isCapabilityAvailable(Cap)) { ReqExts.append(getSymbolicOperandExtensions( SPIRV::OperandCategory::CapabilityOperand, Cap)); - return {true, {Cap}, ReqExts, ReqMinVer, ReqMaxVer}; + return {true, {Cap}, std::move(ReqExts), ReqMinVer, ReqMaxVer}; } } else { // By SPIR-V specification: "If an instruction, enumerant, or other @@ -111,7 +111,7 @@ getSymbolicOperandRequirements(SPIRV::OperandCategory::OperandCategory Category, if (i == Sz - 1 || !AvoidCaps.S.contains(Cap)) { ReqExts.append(getSymbolicOperandExtensions( SPIRV::OperandCategory::CapabilityOperand, Cap)); - return {true, {Cap}, ReqExts, ReqMinVer, ReqMaxVer}; + return {true, {Cap}, std::move(ReqExts), ReqMinVer, ReqMaxVer}; } } } @@ -558,7 +558,7 @@ static void collectOtherInstr(MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, bool Append = true) { MAI.setSkipEmission(&MI); InstrSignature MISign = instrToSignature(MI, MAI, true); - auto FoundMI = IS.insert(MISign); + auto FoundMI = IS.insert(std::move(MISign)); if (!FoundMI.second) return; // insert failed, so we found a duplicate; don't add it to MAI.MS // No duplicates, so add it. diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h index a0d47cb..41c792a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h @@ -54,8 +54,8 @@ struct Requirements { std::optional<Capability::Capability> Cap = {}, ExtensionList Exts = {}, VersionTuple MinVer = VersionTuple(), VersionTuple MaxVer = VersionTuple()) - : IsSatisfiable(IsSatisfiable), Cap(Cap), Exts(Exts), MinVer(MinVer), - MaxVer(MaxVer) {} + : IsSatisfiable(IsSatisfiable), Cap(Cap), Exts(std::move(Exts)), + MinVer(MinVer), MaxVer(MaxVer) {} Requirements(Capability::Capability Cap) : Requirements(true, {Cap}) {} }; @@ -217,7 +217,8 @@ struct SPIRVModuleAnalysis : public ModulePass { static char ID; public: - SPIRVModuleAnalysis() : ModulePass(ID) {} + SPIRVModuleAnalysis() + : ModulePass(ID), ST(nullptr), GR(nullptr), TII(nullptr), MMI(nullptr) {} bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override; diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp index 1d38244..d17528d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp @@ -147,7 +147,7 @@ void visit(MachineFunction &MF, MachineBasicBlock &Start, // Do a preorder traversal of the CFG starting from the given function's entry // point. Calls |op| on each basic block encountered during the traversal. void visit(MachineFunction &MF, std::function<void(MachineBasicBlock *)> op) { - visit(MF, *MF.begin(), op); + visit(MF, *MF.begin(), std::move(op)); } bool SPIRVPostLegalizer::runOnMachineFunction(MachineFunction &MF) { diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index f4b4846..b62db7f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -99,6 +99,7 @@ addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR, SPIRVType *ExtType = GR->getOrCreateSPIRVType( Const->getType(), MIB, SPIRV::AccessQualifier::ReadWrite, true); + assert(SrcMI && "Expected source instruction to be valid"); SrcMI->setDesc(STI.getInstrInfo()->get(SPIRV::OpConstantNull)); SrcMI->addOperand(MachineOperand::CreateReg( GR->getSPIRVTypeID(ExtType), false)); diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp index 595424b..74aec4f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp @@ -234,7 +234,7 @@ static SmallVector<Metadata *> parseAnnotation(Value *I, return SmallVector<Metadata *>{}; MDs.push_back(MDNode::get(Ctx, MDsItem)); } - return Pos == static_cast<int>(Anno.length()) ? MDs + return Pos == static_cast<int>(Anno.length()) ? std::move(MDs) : SmallVector<Metadata *>{}; } diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index d7cf211..e0bfb77 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -226,6 +226,7 @@ void SPIRVPassConfig::addIRPasses() { } void SPIRVPassConfig::addISelPrepare() { + addPass(createSPIRVLegalizeImplicitBindingPass()); addPass(createSPIRVEmitIntrinsicsPass(&getTM<SPIRVTargetMachine>())); if (TM.getSubtargetImpl()->isLogicalSPIRV()) addPass(createSPIRVLegalizePointerCastPass(&getTM<SPIRVTargetMachine>())); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 416d811..820e56b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -463,8 +463,10 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) { DemangledNameLenStart = NameSpaceStart + 11; } Start = Name.find_first_not_of("0123456789", DemangledNameLenStart); - Name.substr(DemangledNameLenStart, Start - DemangledNameLenStart) - .getAsInteger(10, Len); + [[maybe_unused]] bool Error = + Name.substr(DemangledNameLenStart, Start - DemangledNameLenStart) + .getAsInteger(10, Len); + assert(!Error && "Failed to parse demangled name length"); return Name.substr(Start, Len).str(); } @@ -756,7 +758,7 @@ bool getVacantFunctionName(Module &M, std::string &Name) { for (unsigned I = 0; I < MaxIters; ++I) { std::string OrdName = Name + Twine(I).str(); if (!M.getFunction(OrdName)) { - Name = OrdName; + Name = std::move(OrdName); return true; } } diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 6297916..5ee66e3 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -574,13 +574,11 @@ void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF, // Call mcount (Regmask from CC AnyReg since mcount preserves all normal // argument registers). - FunctionCallee FC = MF.getFunction().getParent()->getOrInsertFunction( - "mcount", Type::getVoidTy(MF.getFunction().getContext())); const uint32_t *Mask = MF.getSubtarget<SystemZSubtarget>() .getSpecialRegisters() ->getCallPreservedMask(MF, CallingConv::AnyReg); BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::CallBRASL)) - .addGlobalAddress(dyn_cast<Function>(FC.getCallee())) + .addExternalSymbol("mcount") .addRegMask(Mask); // Reload return address from 8 bytes above stack pointer. diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp index 2f92f86..39bec47 100644 --- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp +++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp @@ -145,39 +145,40 @@ struct DecodeRegister { }; const DecodeRegister SRDecoderTable[] = { - {Xtensa::LBEG, 0}, {Xtensa::LEND, 1}, - {Xtensa::LCOUNT, 2}, {Xtensa::SAR, 3}, - {Xtensa::BREG, 4}, {Xtensa::LITBASE, 5}, - {Xtensa::ACCLO, 16}, {Xtensa::ACCHI, 17}, - {Xtensa::M0, 32}, {Xtensa::M1, 33}, - {Xtensa::M2, 34}, {Xtensa::M3, 35}, - {Xtensa::WINDOWBASE, 72}, {Xtensa::WINDOWSTART, 73}, - {Xtensa::IBREAKENABLE, 96}, {Xtensa::MEMCTL, 97}, - {Xtensa::DDR, 104}, {Xtensa::IBREAKA0, 128}, - {Xtensa::IBREAKA1, 129}, {Xtensa::DBREAKA0, 144}, - {Xtensa::DBREAKA1, 145}, {Xtensa::DBREAKC0, 160}, - {Xtensa::DBREAKC1, 161}, {Xtensa::CONFIGID0, 176}, - {Xtensa::EPC1, 177}, {Xtensa::EPC2, 178}, - {Xtensa::EPC3, 179}, {Xtensa::EPC4, 180}, - {Xtensa::EPC5, 181}, {Xtensa::EPC6, 182}, - {Xtensa::EPC7, 183}, {Xtensa::DEPC, 192}, - {Xtensa::EPS2, 194}, {Xtensa::EPS3, 195}, - {Xtensa::EPS4, 196}, {Xtensa::EPS5, 197}, - {Xtensa::EPS6, 198}, {Xtensa::EPS7, 199}, - {Xtensa::CONFIGID1, 208}, {Xtensa::EXCSAVE1, 209}, - {Xtensa::EXCSAVE2, 210}, {Xtensa::EXCSAVE3, 211}, - {Xtensa::EXCSAVE4, 212}, {Xtensa::EXCSAVE5, 213}, - {Xtensa::EXCSAVE6, 214}, {Xtensa::EXCSAVE7, 215}, - {Xtensa::CPENABLE, 224}, {Xtensa::INTERRUPT, 226}, - {Xtensa::INTCLEAR, 227}, {Xtensa::INTENABLE, 228}, - {Xtensa::PS, 230}, {Xtensa::VECBASE, 231}, - {Xtensa::EXCCAUSE, 232}, {Xtensa::DEBUGCAUSE, 233}, - {Xtensa::CCOUNT, 234}, {Xtensa::PRID, 235}, - {Xtensa::ICOUNT, 236}, {Xtensa::ICOUNTLEVEL, 237}, - {Xtensa::EXCVADDR, 238}, {Xtensa::CCOMPARE0, 240}, - {Xtensa::CCOMPARE1, 241}, {Xtensa::CCOMPARE2, 242}, - {Xtensa::MISC0, 244}, {Xtensa::MISC1, 245}, - {Xtensa::MISC2, 246}, {Xtensa::MISC3, 247}}; + {Xtensa::LBEG, 0}, {Xtensa::LEND, 1}, + {Xtensa::LCOUNT, 2}, {Xtensa::SAR, 3}, + {Xtensa::BREG, 4}, {Xtensa::LITBASE, 5}, + {Xtensa::SCOMPARE1, 12}, {Xtensa::ACCLO, 16}, + {Xtensa::ACCHI, 17}, {Xtensa::M0, 32}, + {Xtensa::M1, 33}, {Xtensa::M2, 34}, + {Xtensa::M3, 35}, {Xtensa::WINDOWBASE, 72}, + {Xtensa::WINDOWSTART, 73}, {Xtensa::IBREAKENABLE, 96}, + {Xtensa::MEMCTL, 97}, {Xtensa::ATOMCTL, 99}, + {Xtensa::DDR, 104}, {Xtensa::IBREAKA0, 128}, + {Xtensa::IBREAKA1, 129}, {Xtensa::DBREAKA0, 144}, + {Xtensa::DBREAKA1, 145}, {Xtensa::DBREAKC0, 160}, + {Xtensa::DBREAKC1, 161}, {Xtensa::CONFIGID0, 176}, + {Xtensa::EPC1, 177}, {Xtensa::EPC2, 178}, + {Xtensa::EPC3, 179}, {Xtensa::EPC4, 180}, + {Xtensa::EPC5, 181}, {Xtensa::EPC6, 182}, + {Xtensa::EPC7, 183}, {Xtensa::DEPC, 192}, + {Xtensa::EPS2, 194}, {Xtensa::EPS3, 195}, + {Xtensa::EPS4, 196}, {Xtensa::EPS5, 197}, + {Xtensa::EPS6, 198}, {Xtensa::EPS7, 199}, + {Xtensa::CONFIGID1, 208}, {Xtensa::EXCSAVE1, 209}, + {Xtensa::EXCSAVE2, 210}, {Xtensa::EXCSAVE3, 211}, + {Xtensa::EXCSAVE4, 212}, {Xtensa::EXCSAVE5, 213}, + {Xtensa::EXCSAVE6, 214}, {Xtensa::EXCSAVE7, 215}, + {Xtensa::CPENABLE, 224}, {Xtensa::INTERRUPT, 226}, + {Xtensa::INTCLEAR, 227}, {Xtensa::INTENABLE, 228}, + {Xtensa::PS, 230}, {Xtensa::VECBASE, 231}, + {Xtensa::EXCCAUSE, 232}, {Xtensa::DEBUGCAUSE, 233}, + {Xtensa::CCOUNT, 234}, {Xtensa::PRID, 235}, + {Xtensa::ICOUNT, 236}, {Xtensa::ICOUNTLEVEL, 237}, + {Xtensa::EXCVADDR, 238}, {Xtensa::CCOMPARE0, 240}, + {Xtensa::CCOMPARE1, 241}, {Xtensa::CCOMPARE2, 242}, + {Xtensa::MISC0, 244}, {Xtensa::MISC1, 245}, + {Xtensa::MISC2, 246}, {Xtensa::MISC3, 247}}; static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp index 821cba0..080a9c0 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp @@ -200,6 +200,9 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits, case Xtensa::WINDOWBASE: case Xtensa::WINDOWSTART: return FeatureBits[Xtensa::FeatureWindowed]; + case Xtensa::ATOMCTL: + case Xtensa::SCOMPARE1: + return FeatureBits[Xtensa::FeatureWindowed]; case Xtensa::NoRegister: return false; } diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td index 97d5472..d6f3ef0 100644 --- a/llvm/lib/Target/Xtensa/XtensaFeatures.td +++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td @@ -73,6 +73,22 @@ def FeatureDiv32 : SubtargetFeature<"div32", "HasDiv32", "true", def HasDiv32 : Predicate<"Subtarget->hasDiv32()">, AssemblerPredicate<(all_of FeatureDiv32)>; +def FeatureS32C1I : SubtargetFeature<"s32c1i", "HasS32C1I", "true", + "Enable Xtensa S32C1I option">; +def HasS32C1I : Predicate<"Subtarget->hasS32C1I()">, + AssemblerPredicate<(all_of FeatureS32C1I)>; + +// Assume that lock-free native-width atomics are available, even if the target +// and operating system combination would not usually provide them. The user +// is responsible for providing any necessary __sync implementations. Code +// built with this feature is not ABI-compatible with code built without this +// feature, if atomic variables are exposed across the ABI boundary. +def FeatureForcedAtomics : SubtargetFeature<"forced-atomics", "HasForcedAtomics", "true", + "Assume that lock-free native-width atomics are available">; +def HasForcedAtomics : Predicate<"Subtarget->hasForcedAtomics()">, + AssemblerPredicate<(all_of FeatureForcedAtomics)>; +def HasAtomicLdSt : Predicate<"Subtarget->hasS32C1I() || Subtarget->hasForcedAtomics()">; + def FeatureRegionProtection : SubtargetFeature<"regprotect", "HasRegionProtection", "true", "Enable Xtensa Region Protection option">; def HasRegionProtection : Predicate<"Subtarget->hasRegionProtection()">, diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index fd42fd2..6a07bd8 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -250,6 +250,15 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); + if (Subtarget.hasS32C1I()) { + setMaxAtomicSizeInBitsSupported(32); + setMinCmpXchgSizeInBits(32); + } else if (Subtarget.hasForcedAtomics()) { + setMaxAtomicSizeInBitsSupported(32); + } else { + setMaxAtomicSizeInBitsSupported(0); + } + // Compute derived properties from the register classes computeRegisterProperties(STI.getRegisterInfo()); } @@ -1548,6 +1557,11 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +TargetLowering::AtomicExpansionKind +XtensaTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + return AtomicExpansionKind::CmpXChg; +} + //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// @@ -1696,6 +1710,23 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter( return MBB; } + case Xtensa::ATOMIC_CMP_SWAP_32_P: { + MachineOperand &R = MI.getOperand(0); + MachineOperand &Addr = MI.getOperand(1); + MachineOperand &Cmp = MI.getOperand(2); + MachineOperand &Swap = MI.getOperand(3); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1) + .addReg(Cmp.getReg()); + + BuildMI(*MBB, MI, DL, TII.get(Xtensa::S32C1I), R.getReg()) + .addReg(Swap.getReg()) + .addReg(Addr.getReg()) + .addImm(0); + + MI.eraseFromParent(); + return MBB; + } default: llvm_unreachable("Unexpected instr type to insert"); } diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index e6ddf98..d84cbdb 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -145,6 +145,12 @@ public: const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } + + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 31608f4..edcf247 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -496,6 +496,8 @@ def EXTW : RRR_Inst<0x00, 0x00, 0x00, (outs), (ins), let hasSideEffects = 1; } +def : Pat<(atomic_fence timm, timm), (MEMW)>; + //===----------------------------------------------------------------------===// // Illegal instructions //===----------------------------------------------------------------------===// @@ -1499,6 +1501,46 @@ def RFI : RRR_Inst<0x00, 0x00, 0x00, (outs), (ins uimm4:$imm), } //===----------------------------------------------------------------------===// +// S32C1I +//===----------------------------------------------------------------------===// + +let mayStore = 1, mayLoad = 1, Predicates = [HasS32C1I] in { + def S32C1I : RRI8_Inst<0x02, (outs AR:$a), (ins AR:$t, mem32:$addr), + "s32c1i\t$t, $addr", []> { + bits<12> addr; + + let r = 0x0e; + let Uses = [SCOMPARE1]; + let Constraints = "$a = $t"; + let imm8{7-0} = addr{11-4}; + let s{3-0} = addr{3-0}; + } +} + +//===----------------------------------------------------------------------===// +// Atomic patterns +//===----------------------------------------------------------------------===// + +// Atomic load/store are available under both +s32c1i and +force-atomics. +// Fences will be inserted for atomic load/stores according to the logic in +// XtensaTargetLowering. +let Predicates = [HasAtomicLdSt] in { + def : Pat<(i32 (atomic_load_8 addr_ish1:$addr)), (L8UI addr_ish1:$addr)>; + def : Pat<(i32 (atomic_load_16 addr_ish2:$addr)), (L16UI addr_ish2:$addr)>; + def : Pat<(i32 (atomic_load_32 addr_ish4:$addr)), (L32I addr_ish4:$addr)>; + + def : Pat<(atomic_store_8 AR:$t, addr_ish1:$addr), (S8I AR:$t, addr_ish1:$addr)>; + def : Pat<(atomic_store_16 AR:$t, addr_ish2:$addr), (S16I AR:$t, addr_ish2:$addr)>; + def : Pat<(atomic_store_32 AR:$t, addr_ish4:$addr), (S32I AR:$t, addr_ish4:$addr)>; +} + +let usesCustomInserter = 1, Predicates = [HasS32C1I] in { + def ATOMIC_CMP_SWAP_32_P : Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap), + "!atomic_cmp_swap_32_p, $dst, $ptr, $cmp, $swap", + [(set AR:$dst, (atomic_cmp_swap_i32 AR:$ptr, AR:$cmp, AR:$swap))]>; +} + +//===----------------------------------------------------------------------===// // DSP Instructions //===----------------------------------------------------------------------===// include "XtensaDSPInstrInfo.td" diff --git a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td index 596c410..d1f2c6b 100644 --- a/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaRegisterInfo.td @@ -84,6 +84,9 @@ def SAR : SRReg<3, "sar", ["SAR","3"]>; // Boolean Register def BREG : SRReg<4, "br", ["BR","4"]>; +// Expected data value for S32C1I operation +def SCOMPARE1 : SRReg<12, "scompare1", ["SCOMPARE1", "12"]>; + // Literal base def LITBASE : SRReg<5, "litbase", ["LITBASE", "5"]>; @@ -97,6 +100,9 @@ def IBREAKENABLE : SRReg<96, "ibreakenable", ["IBREAKENABLE", "96"]>; // Memory Control Register def MEMCTL : SRReg<97, "memctl", ["MEMCTL", "97"]>; +// Atomic Operation Control +def ATOMCTL : SRReg<99, "atomctl", ["ATOMCTL", "99"]>; + def DDR : SRReg<104, "ddr", ["DDR", "104"]>; // Instuction break address register 0 @@ -218,8 +224,8 @@ def MR23 : RegisterClass<"Xtensa", [i32], 32, (add M2, M3)>; def MR : RegisterClass<"Xtensa", [i32], 32, (add MR01, MR23)>; def SR : RegisterClass<"Xtensa", [i32], 32, (add - LBEG, LEND, LCOUNT, SAR, BREG, LITBASE, ACCLO, ACCHI, MR, - WINDOWBASE, WINDOWSTART, IBREAKENABLE, MEMCTL, DDR, IBREAKA0, IBREAKA1, + LBEG, LEND, LCOUNT, SAR, BREG, SCOMPARE1, LITBASE, ACCLO, ACCHI, MR, + WINDOWBASE, WINDOWSTART, IBREAKENABLE, MEMCTL, ATOMCTL, DDR, IBREAKA0, IBREAKA1, DBREAKA0, DBREAKA1, DBREAKC0, DBREAKC1, CONFIGID0, EPC1, EPC2, EPC3, EPC4, EPC5, EPC6, EPC7, DEPC, EPS2, EPS3, EPS4, EPS5, EPS6, EPS7, CONFIGID1, EXCSAVE1, EXCSAVE2, EXCSAVE3, EXCSAVE4, EXCSAVE5, EXCSAVE6, EXCSAVE7, CPENABLE, INTERRUPT, INTSET, INTCLEAR, INTENABLE, diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h index fd677a4..b406534 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h @@ -77,6 +77,8 @@ public: bool hasMul32() const { return HasMul32; } bool hasMul32High() const { return HasMul32High; } bool hasDiv32() const { return HasDiv32; } + bool hasS32C1I() const { return HasS32C1I; } + bool hasForcedAtomics() const { return HasForcedAtomics; } bool hasSingleFloat() const { return HasSingleFloat; } bool hasRegionProtection() const { return HasRegionProtection; } bool hasRelocatableVector() const { return HasRelocatableVector; } diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp index 8d2dca6..c9f1ca8 100644 --- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp +++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp @@ -107,6 +107,7 @@ public: } bool addInstSelector() override; + void addIRPasses() override; void addPreEmitPass() override; }; } // end anonymous namespace @@ -116,6 +117,11 @@ bool XtensaPassConfig::addInstSelector() { return false; } +void XtensaPassConfig::addIRPasses() { + addPass(createAtomicExpandLegacyPass()); + TargetPassConfig::addIRPasses(); +} + void XtensaPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } TargetPassConfig *XtensaTargetMachine::createPassConfig(PassManagerBase &PM) { diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 7e09d30..79c40c3 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "llvm/TargetParser/Host.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -167,35 +169,10 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { .Default(generic); } -StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { - // The cpuid register on arm is not accessible from user space. On Linux, - // it is exposed through the /proc/cpuinfo file. - - // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line - // in all cases. - SmallVector<StringRef, 32> Lines; - ProcCpuinfoContent.split(Lines, '\n'); - - // Look for the CPU implementer and hardware lines, and store the CPU part - // numbers found. - StringRef Implementer; - StringRef Hardware; - SmallVector<StringRef, 32> Parts; - for (StringRef Line : Lines) { - if (Line.consume_front("CPU implementer")) - Implementer = Line.ltrim("\t :"); - else if (Line.consume_front("Hardware")) - Hardware = Line.ltrim("\t :"); - else if (Line.consume_front("CPU part")) - Parts.emplace_back(Line.ltrim("\t :")); - } - - // Last `Part' seen, in case we don't analyse all `Parts' parsed. - StringRef Part = Parts.empty() ? StringRef() : Parts.back(); - - // Remove duplicate `Parts'. - llvm::sort(Parts); - Parts.erase(llvm::unique(Parts), Parts.end()); +StringRef +getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware, + StringRef Part, ArrayRef<StringRef> Parts, + function_ref<unsigned()> GetVariant) { auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { if (Parts.size() == 2) @@ -343,21 +320,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. // The Exynos chips have a convoluted ID scheme that doesn't seem to follow // any predictive pattern across variants and parts. - unsigned Variant = 0, Part = 0; // Look for the CPU variant line, whose value is a 1 digit hexadecimal // number, corresponding to the Variant bits in the CP15/C0 register. - for (auto I : Lines) - if (I.consume_front("CPU variant")) - I.ltrim("\t :").getAsInteger(0, Variant); + unsigned Variant = GetVariant(); - // Look for the CPU part line, whose value is a 3 digit hexadecimal - // number, corresponding to the PartNum bits in the CP15/C0 register. - for (auto I : Lines) - if (I.consume_front("CPU part")) - I.ltrim("\t :").getAsInteger(0, Part); + // Convert the CPU part line, whose value is a 3 digit hexadecimal number, + // corresponding to the PartNum bits in the CP15/C0 register. + unsigned PartAsInt; + Part.getAsInteger(0, PartAsInt); - unsigned Exynos = (Variant << 12) | Part; + unsigned Exynos = (Variant << 12) | PartAsInt; switch (Exynos) { default: // Default by falling through to Exynos M3. @@ -416,6 +389,86 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { return "generic"; } +StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { + // The cpuid register on arm is not accessible from user space. On Linux, + // it is exposed through the /proc/cpuinfo file. + + // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line + // in all cases. + SmallVector<StringRef, 32> Lines; + ProcCpuinfoContent.split(Lines, '\n'); + + // Look for the CPU implementer and hardware lines, and store the CPU part + // numbers found. + StringRef Implementer; + StringRef Hardware; + SmallVector<StringRef, 32> Parts; + for (StringRef Line : Lines) { + if (Line.consume_front("CPU implementer")) + Implementer = Line.ltrim("\t :"); + else if (Line.consume_front("Hardware")) + Hardware = Line.ltrim("\t :"); + else if (Line.consume_front("CPU part")) + Parts.emplace_back(Line.ltrim("\t :")); + } + + // Last `Part' seen, in case we don't analyse all `Parts' parsed. + StringRef Part = Parts.empty() ? StringRef() : Parts.back(); + + // Remove duplicate `Parts'. + llvm::sort(Parts); + Parts.erase(llvm::unique(Parts), Parts.end()); + + auto GetVariant = [&]() { + unsigned Variant = 0; + for (auto I : Lines) + if (I.consume_front("CPU variant")) + I.ltrim("\t :").getAsInteger(0, Variant); + return Variant; + }; + + return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts, + GetVariant); +} + +StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo, + ArrayRef<uint64_t> UniqueCpuInfos) { + // On Windows, the registry provides cached copied of the MIDR_EL1 register. + union MIDR_EL1 { + uint64_t Raw; + struct _Components { + uint64_t Revision : 4; + uint64_t Partnum : 12; + uint64_t Architecture : 4; + uint64_t Variant : 4; + uint64_t Implementer : 8; + uint64_t Reserved : 32; + } Components; + }; + + SmallVector<std::string> PartsHolder; + PartsHolder.reserve(UniqueCpuInfos.size()); + for (auto Info : UniqueCpuInfos) + PartsHolder.push_back("0x" + utohexstr(MIDR_EL1{Info}.Components.Partnum, + /*LowerCase*/ true, + /*Width*/ 3)); + + SmallVector<StringRef> Parts; + Parts.reserve(PartsHolder.size()); + for (const auto &Part : PartsHolder) + Parts.push_back(Part); + + return getHostCPUNameForARMFromComponents( + "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Implementer, + /*LowerCase*/ true, + /*Width*/ 2), + /*Hardware*/ "", + "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Partnum, + /*LowerCase*/ true, + /*Width*/ 3), + Parts, [=]() { return MIDR_EL1{PrimaryCpuInfo}.Components.Variant; }); +} + namespace { StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) { switch (Id) { @@ -1450,6 +1503,75 @@ StringRef sys::getHostCPUName() { return "generic"; } +#elif defined(_M_ARM64) || defined(_M_ARM64EC) + +StringRef sys::getHostCPUName() { + constexpr char CentralProcessorKeyName[] = + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor"; + // Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be + // enough for the slash and name. + constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10; + + SmallVector<uint64_t> Values; + uint64_t PrimaryCpuInfo; + char PrimaryPartKeyName[SubKeyNameMaxSize]; + DWORD PrimaryPartKeyNameSize = 0; + HKEY CentralProcessorKey; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ, + &CentralProcessorKey) == ERROR_SUCCESS) { + for (unsigned Index = 0; Index < UINT32_MAX; ++Index) { + char SubKeyName[SubKeyNameMaxSize]; + DWORD SubKeySize = SubKeyNameMaxSize; + HKEY SubKey; + if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize, + nullptr, nullptr, nullptr, + nullptr) == ERROR_SUCCESS) && + (RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ, + &SubKey) == ERROR_SUCCESS)) { + // The "CP 4000" registry key contains a cached copy of the MIDR_EL1 + // register. + uint64_t RegValue; + DWORD ActualType; + DWORD RegValueSize = sizeof(RegValue); + if ((RegQueryValueExA(SubKey, "CP 4000", nullptr, &ActualType, + (PBYTE)&RegValue, + &RegValueSize) == ERROR_SUCCESS) && + (ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) { + // Assume that the part with the "highest" reg key name is the primary + // part (to match the way that Linux's cpuinfo is written). Win32 + // makes no guarantees about the order of sub keys, so we have to + // compare the names. + if (PrimaryPartKeyNameSize < SubKeySize || + (PrimaryPartKeyNameSize == SubKeySize && + ::memcmp(SubKeyName, PrimaryPartKeyName, SubKeySize) > 0)) { + PrimaryCpuInfo = RegValue; + ::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1); + PrimaryPartKeyNameSize = SubKeySize; + } + if (!llvm::is_contained(Values, RegValue)) { + Values.push_back(RegValue); + } + } + RegCloseKey(SubKey); + } else { + // No more sub keys. + break; + } + } + RegCloseKey(CentralProcessorKey); + } + + if (Values.empty()) { + return "generic"; + } + + // Win32 makes no guarantees about the order of sub keys, so sort to ensure + // reproducibility. + llvm::sort(Values); + + return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values); +} + #elif defined(__APPLE__) && defined(__powerpc__) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; diff --git a/llvm/lib/TextAPI/Architecture.cpp b/llvm/lib/TextAPI/Architecture.cpp index 51ca91d..3b53067 100644 --- a/llvm/lib/TextAPI/Architecture.cpp +++ b/llvm/lib/TextAPI/Architecture.cpp @@ -21,7 +21,7 @@ namespace llvm { namespace MachO { Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType) { -#define ARCHINFO(Arch, Type, Subtype, NumBits) \ +#define ARCHINFO(Arch, Name, Type, Subtype, NumBits) \ if (CPUType == (Type) && \ (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) == (Subtype)) \ return AK_##Arch; @@ -33,7 +33,7 @@ Architecture getArchitectureFromCpuType(uint32_t CPUType, uint32_t CPUSubType) { Architecture getArchitectureFromName(StringRef Name) { return StringSwitch<Architecture>(Name) -#define ARCHINFO(Arch, Type, Subtype, NumBits) .Case(#Arch, AK_##Arch) +#define ARCHINFO(Arch, Name, Type, Subtype, NumBits) .Case(#Name, AK_##Arch) #include "llvm/TextAPI/Architecture.def" #undef ARCHINFO .Default(AK_unknown); @@ -41,9 +41,9 @@ Architecture getArchitectureFromName(StringRef Name) { StringRef getArchitectureName(Architecture Arch) { switch (Arch) { -#define ARCHINFO(Arch, Type, Subtype, NumBits) \ +#define ARCHINFO(Arch, Name, Type, Subtype, NumBits) \ case AK_##Arch: \ - return #Arch; + return #Name; #include "llvm/TextAPI/Architecture.def" #undef ARCHINFO case AK_unknown: @@ -57,7 +57,7 @@ StringRef getArchitectureName(Architecture Arch) { std::pair<uint32_t, uint32_t> getCPUTypeFromArchitecture(Architecture Arch) { switch (Arch) { -#define ARCHINFO(Arch, Type, Subtype, NumBits) \ +#define ARCHINFO(Arch, Name, Type, Subtype, NumBits) \ case AK_##Arch: \ return std::make_pair(Type, Subtype); #include "llvm/TextAPI/Architecture.def" @@ -77,7 +77,7 @@ Architecture mapToArchitecture(const Triple &Target) { bool is64Bit(Architecture Arch) { switch (Arch) { -#define ARCHINFO(Arch, Type, Subtype, NumBits) \ +#define ARCHINFO(Arch, Name, Type, Subtype, NumBits) \ case AK_##Arch: \ return NumBits == 64; #include "llvm/TextAPI/Architecture.def" diff --git a/llvm/lib/TextAPI/TextStubCommon.cpp b/llvm/lib/TextAPI/TextStubCommon.cpp index 0b710b0..7bf1f9a 100644 --- a/llvm/lib/TextAPI/TextStubCommon.cpp +++ b/llvm/lib/TextAPI/TextStubCommon.cpp @@ -133,7 +133,7 @@ QuotingType ScalarTraits<PlatformSet>::mustQuote(StringRef) { void ScalarBitSetTraits<ArchitectureSet>::bitset(IO &IO, ArchitectureSet &Archs) { -#define ARCHINFO(arch, type, subtype, numbits) \ +#define ARCHINFO(arch, name, type, subtype, numbits) \ IO.bitSetCase(Archs, #arch, 1U << static_cast<int>(AK_##arch)); #include "llvm/TextAPI/Architecture.def" #undef ARCHINFO diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 5485998..0d48a35 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -80,6 +80,10 @@ static cl::opt<bool> ClCompoundReadBeforeWrite( "tsan-compound-read-before-write", cl::init(false), cl::desc("Emit special compound instrumentation for reads-before-writes"), cl::Hidden); +static cl::opt<bool> + ClOmitNonCaptured("tsan-omit-by-pointer-capturing", cl::init(true), + cl::desc("Omit accesses due to pointer capturing"), + cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -450,7 +454,8 @@ void ThreadSanitizer::chooseInstructionsToInstrument( const AllocaInst *AI = findAllocaForValue(Addr); // Instead of Addr, we should check whether its base pointer is captured. - if (AI && !PointerMayBeCaptured(AI, /*ReturnCaptures=*/true)) { + if (AI && !PointerMayBeCaptured(AI, /*ReturnCaptures=*/true) && + ClOmitNonCaptured) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race // (see llvm/Analysis/CaptureTracking.h for details). diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp index d67192f..0ffea3f 100644 --- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp +++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp @@ -26,6 +26,18 @@ using namespace llvm; static cl::opt<int64_t> DefaultFunctionEntryCount("profcheck-default-function-entry-count", cl::init(1000)); +static cl::opt<bool> + AnnotateSelect("profcheck-annotate-select", cl::init(true), + cl::desc("Also inject (if missing) and verify MD_prof for " + "`select` instructions")); +static cl::opt<uint32_t> SelectTrueWeight( + "profcheck-default-select-true-weight", cl::init(2U), + cl::desc("When annotating `select` instructions, this value will be used " + "for the first ('true') case.")); +static cl::opt<uint32_t> SelectFalseWeight( + "profcheck-default-select-false-weight", cl::init(3U), + cl::desc("When annotating `select` instructions, this value will be used " + "for the second ('false') case.")); namespace { class ProfileInjector { Function &F; @@ -82,6 +94,13 @@ bool ProfileInjector::inject() { return false; bool Changed = false; for (auto &BB : F) { + if (AnnotateSelect) { + for (auto &I : BB) { + if (isa<SelectInst>(I) && !I.getMetadata(LLVMContext::MD_prof)) + setBranchWeights(I, {SelectTrueWeight, SelectFalseWeight}, + /*IsExpected=*/false); + } + } auto *Term = getTerminatorBenefitingFromMDProf(BB); if (!Term || Term->getMetadata(LLVMContext::MD_prof)) continue; @@ -144,12 +163,18 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F, } if (EntryCount->getCount() == 0) return PreservedAnalyses::all(); - for (const auto &BB : F) + for (const auto &BB : F) { + if (AnnotateSelect) { + for (const auto &I : BB) + if (isa<SelectInst>(I) && !I.getMetadata(LLVMContext::MD_prof)) + F.getContext().emitError( + "Profile verification failed: select annotation missing"); + } if (const auto *Term = ProfileInjector::getTerminatorBenefitingFromMDProf(BB)) if (!Term->getMetadata(LLVMContext::MD_prof)) F.getContext().emitError( "Profile verification failed: branch annotation missing"); - + } return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a52aa84..9667b50 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8301,7 +8301,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, VPRecipeBase *Recipe; Instruction *Instr = R->getUnderlyingInstr(); SmallVector<VPValue *, 4> Operands(R->operands()); - if (auto *PhiR = dyn_cast<VPWidenPHIRecipe>(R)) { + if (auto *PhiR = dyn_cast<VPPhi>(R)) { VPBasicBlock *Parent = PhiR->getParent(); [[maybe_unused]] VPRegionBlock *LoopRegionOf = Parent->getEnclosingLoopRegion(); @@ -8339,6 +8339,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, PhiRecipe->addOperand(Operands[1]); return PhiRecipe; } + assert(!R->isPhi() && "only VPPhi nodes expected at this point"); if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate( cast<TruncInst>(Instr), Operands, Range))) @@ -10321,8 +10322,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { // TODO: Move to general VPlan pipeline once epilogue loops are also // supported. - VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, - BestPlan, VF.Width, IC, PSE); + VPlanTransforms::runPass( + VPlanTransforms::materializeConstantVectorTripCount, BestPlan, + VF.Width, IC, PSE); LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); @@ -10393,8 +10395,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { Checks, BestPlan); // TODO: Move to general VPlan pipeline once epilogue loops are also // supported. - VPlanTransforms::runPass(VPlanTransforms::materializeVectorTripCount, - BestPlan, VF.Width, IC, PSE); + VPlanTransforms::runPass( + VPlanTransforms::materializeConstantVectorTripCount, BestPlan, + VF.Width, IC, PSE); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 62ab3f52..5d0e2f9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15097,7 +15097,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals, for (ExternalUser &EU : ExternalUses) { LLVM_DEBUG(dbgs() << "SLP: Computing cost for external use of TreeEntry " << EU.E.Idx << " in lane " << EU.Lane << "\n"); - LLVM_DEBUG(dbgs() << " User:" << *EU.User << "\n"); + LLVM_DEBUG(if (EU.User) dbgs() << " User:" << *EU.User << "\n"; + else dbgs() << " User: nullptr\n"); LLVM_DEBUG(dbgs() << " Use: " << EU.Scalar->getNameOrAsOperand() << "\n"); // Uses by ephemeral values are free (because the ephemeral value will be diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8dfb982..c42cdd5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1242,12 +1242,24 @@ struct LLVM_ABI_FOR_TEST VPPhi : public VPInstruction, public VPPhiAccessors { : VPInstruction(Instruction::PHI, Operands, DL, Name) {} static inline bool classof(const VPUser *U) { - auto *R = dyn_cast<VPInstruction>(U); - return R && R->getOpcode() == Instruction::PHI; + auto *VPI = dyn_cast<VPInstruction>(U); + return VPI && VPI->getOpcode() == Instruction::PHI; + } + + static inline bool classof(const VPValue *V) { + auto *VPI = dyn_cast<VPInstruction>(V); + return VPI && VPI->getOpcode() == Instruction::PHI; + } + + static inline bool classof(const VPSingleDefRecipe *SDR) { + auto *VPI = dyn_cast<VPInstruction>(SDR); + return VPI && VPI->getOpcode() == Instruction::PHI; } VPPhi *clone() override { - return new VPPhi(operands(), getDebugLoc(), getName()); + auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName()); + PhiR->setUnderlyingValue(getUnderlyingValue()); + return PhiR; } void execute(VPTransformState &State) override; @@ -1279,7 +1291,7 @@ public: /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a /// VPIRInstruction. - static VPIRInstruction *create(Instruction &I); + LLVM_ABI_FOR_TEST static VPIRInstruction *create(Instruction &I); VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) @@ -1293,8 +1305,8 @@ public: void execute(VPTransformState &State) override; /// Return the cost of this VPIRInstruction. - InstructionCost computeCost(ElementCount VF, - VPCostContext &Ctx) const override; + LLVM_ABI_FOR_TEST InstructionCost + computeCost(ElementCount VF, VPCostContext &Ctx) const override; Instruction &getInstruction() const { return I; } @@ -1332,7 +1344,8 @@ public: /// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is /// allowed, and it is used to add a new incoming value for the single /// predecessor VPBB. -struct VPIRPhi : public VPIRInstruction, public VPPhiAccessors { +struct LLVM_ABI_FOR_TEST VPIRPhi : public VPIRInstruction, + public VPPhiAccessors { VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} static inline bool classof(const VPRecipeBase *U) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 1b91901..7e8eff31 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -91,17 +91,15 @@ void PlainCFGBuilder::fixHeaderPhis() { for (auto *Phi : PhisToFix) { assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode."); VPValue *VPVal = IRDef2VPValue[Phi]; - assert(isa<VPWidenPHIRecipe>(VPVal) && - "Expected WidenPHIRecipe for phi node."); - auto *VPPhi = cast<VPWidenPHIRecipe>(VPVal); - assert(VPPhi->getNumOperands() == 0 && - "Expected VPInstruction with no operands."); + assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node."); + auto *PhiR = cast<VPPhi>(VPVal); + assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands."); assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) && "Expected Phi in header block."); assert(Phi->getNumOperands() == 2 && "header phi must have exactly 2 operands"); for (BasicBlock *Pred : predecessors(Phi->getParent())) - VPPhi->addOperand( + PhiR->addOperand( getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred))); } } @@ -204,11 +202,11 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, VPSingleDefRecipe *NewR; if (auto *Phi = dyn_cast<PHINode>(Inst)) { - // Phi node's operands may have not been visited at this point. We create + // Phi node's operands may not have been visited at this point. We create // an empty VPInstruction that we will fix once the whole plain CFG has // been built. - NewR = new VPWidenPHIRecipe(Phi, nullptr, Phi->getDebugLoc(), "vec.phi"); - VPBB->appendRecipe(NewR); + NewR = VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi"); + NewR->setUnderlyingValue(Phi); if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) { // Header phis need to be fixed after the VPBB for the latch has been // created. diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index 3b3bbc3..862b930 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -227,10 +227,10 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) { } void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) { - SmallVector<VPWidenPHIRecipe *> Phis; + SmallVector<VPPhi *> Phis; for (VPRecipeBase &R : VPBB->phis()) - Phis.push_back(cast<VPWidenPHIRecipe>(&R)); - for (VPWidenPHIRecipe *PhiR : Phis) { + Phis.push_back(cast<VPPhi>(&R)); + for (VPPhi *PhiR : Phis) { // The non-header Phi is converted into a Blend recipe below, // so we don't have to worry about the insertion order and we can just use // the builder. At this point we generate the predication tree. There may diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a7965a0..1a71a75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -63,17 +63,20 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue()); VPRecipeBase *NewRecipe = nullptr; - if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) { - auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue()); + if (auto *PhiR = dyn_cast<VPPhi>(&Ingredient)) { + auto *Phi = cast<PHINode>(PhiR->getUnderlyingValue()); const auto *II = GetIntOrFpInductionDescriptor(Phi); - if (!II) - continue; - - VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue()); - VPValue *Step = - vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); - NewRecipe = new VPWidenIntOrFpInductionRecipe( - Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc()); + if (!II) { + NewRecipe = new VPWidenPHIRecipe(Phi, nullptr, PhiR->getDebugLoc()); + for (VPValue *Op : PhiR->operands()) + NewRecipe->addOperand(Op); + } else { + VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue()); + VPValue *Step = + vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE); + NewRecipe = new VPWidenIntOrFpInductionRecipe( + Phi, Start, Step, &Plan->getVF(), *II, Ingredient.getDebugLoc()); + } } else { assert(isa<VPInstruction>(&Ingredient) && "only VPInstructions expected here"); @@ -3222,7 +3225,7 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { } } -void VPlanTransforms::materializeVectorTripCount( +void VPlanTransforms::materializeConstantVectorTripCount( VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan"); @@ -3230,19 +3233,26 @@ void VPlanTransforms::materializeVectorTripCount( VPValue *TC = Plan.getTripCount(); // Skip cases for which the trip count may be non-trivial to materialize. + // I.e., when a scalar tail is absent - due to tail folding, or when a scalar + // tail is required. if (!Plan.hasScalarTail() || Plan.getMiddleBlock()->getSingleSuccessor() == Plan.getScalarPreheader() || !TC->isLiveIn()) return; + // Materialize vector trip counts for constants early if it can simply // be computed as (Original TC / VF * UF) * VF * UF. + // TODO: Compute vector trip counts for loops requiring a scalar epilogue and + // tail-folded loops. ScalarEvolution &SE = *PSE.getSE(); auto *TCScev = SE.getSCEV(TC->getLiveInIRValue()); + if (!isa<SCEVConstant>(TCScev)) + return; const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF); auto VecTCScev = SE.getMulExpr(SE.getUDivExpr(TCScev, VFxUF), VFxUF); - if (auto *NewC = dyn_cast<SCEVConstant>(VecTCScev)) - Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue()); + if (auto *ConstVecTC = dyn_cast<SCEVConstant>(VecTCScev)) + Plan.getVectorTripCount().setUnderlyingValue(ConstVecTC->getValue()); } void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5943684..ecaca72 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -252,9 +252,10 @@ struct VPlanTransforms { // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. - static void materializeVectorTripCount(VPlan &Plan, ElementCount BestVF, - unsigned BestUF, - PredicatedScalarEvolution &PSE); + static void + materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, + unsigned BestUF, + PredicatedScalarEvolution &PSE); /// Materialize the backedge-taken count to be computed explicitly using /// VPInstructions. |