diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
38 files changed, 1291 insertions, 1109 deletions
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 58766b1..1fc475d 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -1756,7 +1756,7 @@ bool PPCAsmParser::parseDirectiveLocalEntry(SMLoc L) { if (getParser().parseIdentifier(Name)) return Error(L, "expected identifier in '.localentry' directive"); - MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name)); + auto *Sym = static_cast<MCSymbolELF *>(getContext().getOrCreateSymbol(Name)); const MCExpr *Expr; if (parseToken(AsmToken::Comma) || diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 3808a26..2182039 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -25,7 +25,6 @@ add_llvm_target(PowerPCCodeGen PPCBranchSelector.cpp PPCBranchCoalescing.cpp PPCCallingConv.cpp - PPCCCState.cpp PPCCTRLoops.cpp PPCCTRLoopsVerify.cpp PPCExpandAtomicPseudoInsts.cpp @@ -50,7 +49,7 @@ add_llvm_target(PowerPCCodeGen PPCTargetTransformInfo.cpp PPCTOCRegDeps.cpp PPCTLSDynamicCall.cpp - PPCVSXCopy.cpp + PPCVSXWACCCopy.cpp PPCReduceCRLogicals.cpp PPCVSXFMAMutate.cpp PPCVSXSwapRemoval.cpp diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 71a7614..5e27f06 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -8,6 +8,7 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "TargetInfo/PowerPCTargetInfo.h" +#include "llvm/MC/MCDecoder.h" #include "llvm/MC/MCDecoderOps.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index ec97e2e..04b886a 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -13,6 +13,7 @@ #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" @@ -112,14 +113,15 @@ public: // to resolve the fixup directly. Emit a relocation and leave // resolution of the final target address to the linker. if (const auto *A = Target.getAddSym()) { - if (const auto *S = dyn_cast<MCSymbolELF>(A)) { + if (getContext().isELF()) { // The "other" values are stored in the last 6 bits of the second // byte. The traditional defines for STO values assume the full byte // and thus the shift to pack it. - unsigned Other = S->getOther() << 2; + unsigned Other = static_cast<const MCSymbolELF *>(A)->getOther() << 2; if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0) return true; - } else if (const auto *S = dyn_cast<MCSymbolXCOFF>(A)) { + } else if (getContext().isXCOFF()) { + auto *S = static_cast<const MCSymbolXCOFF *>(A); return !Target.isAbsolute() && S->isExternal() && S->getStorageClass() == XCOFF::C_WEAKEXT; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index a5d3be4..329ad6e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -86,8 +86,8 @@ unsigned PPCELFObjectWriter::getRelocType(const MCFixup &Fixup, case PPC::S_TPREL_HIGHEST: case PPC::S_TPREL_HIGHESTA: case PPC::S_TPREL_LO: - if (auto *SA = Target.getAddSym()) - cast<MCSymbolELF>(SA)->setType(ELF::STT_TLS); + if (auto *SA = const_cast<MCSymbol *>(Target.getAddSym())) + static_cast<MCSymbolELF *>(SA)->setType(ELF::STT_TLS); break; default: break; @@ -499,7 +499,8 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCValue &V, // The "other" values are stored in the last 6 bits of the second byte. // The traditional defines for STO values assume the full byte and thus // the shift to pack it. - unsigned Other = cast<MCSymbolELF>(V.getAddSym())->getOther() << 2; + unsigned Other = + static_cast<const MCSymbolELF *>(V.getAddSym())->getOther() << 2; return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index 2dbc31f..132d5a4 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -65,7 +65,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst, MCFragment *InstructionFragment = getCurrentFragment(); SMLoc InstLoc = Inst.getLoc(); // Check if there was a last label emitted. - if (LastLabel && !LastLabel->isUnset() && LastLabelLoc.isValid() && + if (LastLabel && LastLabel->isDefined() && LastLabelLoc.isValid() && InstLoc.isValid()) { const SourceMgr *SourceManager = getContext().getSourceManager(); unsigned InstLine = SourceManager->FindLineNumber(InstLoc); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 3dad0e8..d856c3f 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -211,7 +211,7 @@ public: : PPCTargetStreamer(S), OS(OS) {} void emitTCEntry(const MCSymbol &S, PPCMCExpr::Specifier Kind) override { - if (const MCSymbolXCOFF *XSym = dyn_cast<MCSymbolXCOFF>(&S)) { + if (getContext().isXCOFF()) { MCSymbolXCOFF *TCSym = static_cast<const MCSectionXCOFF *>(Streamer.getCurrentSectionOnly()) ->getQualNameSymbol(); @@ -225,10 +225,10 @@ public: if (Kind == PPC::S_AIX_TLSGD || Kind == PPC::S_AIX_TLSGDM || Kind == PPC::S_AIX_TLSIE || Kind == PPC::S_AIX_TLSLE || Kind == PPC::S_AIX_TLSLD || Kind == PPC::S_AIX_TLSML) - OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@" + OS << "\t.tc " << TCSym->getName() << "," << S.getName() << "@" << getContext().getAsmInfo()->getSpecifierName(Kind) << '\n'; else - OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << '\n'; + OS << "\t.tc " << TCSym->getName() << "," << S.getName() << '\n'; if (TCSym->hasRename()) Streamer.emitXCOFFRenameDirective(TCSym, TCSym->getSymbolTableName()); @@ -308,7 +308,7 @@ public: } void emitAssignment(MCSymbol *S, const MCExpr *Value) override { - auto *Symbol = cast<MCSymbolELF>(S); + auto *Symbol = static_cast<MCSymbolELF *>(S); // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. @@ -335,7 +335,7 @@ private: auto *Ref = dyn_cast<const MCSymbolRefExpr>(S); if (!Ref) return false; - const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol()); + auto &RhsSym = static_cast<const MCSymbolELF &>(Ref->getSymbol()); unsigned Other = D->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index 124dac4..a7cd5cd 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -39,7 +39,7 @@ class ModulePass; FunctionPass *createPPCLoopInstrFormPrepPass(PPCTargetMachine &TM); FunctionPass *createPPCTOCRegDepsPass(); FunctionPass *createPPCEarlyReturnPass(); - FunctionPass *createPPCVSXCopyPass(); + FunctionPass *createPPCVSXWACCCopyPass(); FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCReduceCRLogicalsPass(); @@ -64,7 +64,7 @@ class ModulePass; void initializePPCLoopInstrFormPrepPass(PassRegistry&); void initializePPCTOCRegDepsPass(PassRegistry&); void initializePPCEarlyReturnPass(PassRegistry&); - void initializePPCVSXCopyPass(PassRegistry&); + void initializePPCVSXWACCCopyPass(PassRegistry &); void initializePPCVSXFMAMutatePass(PassRegistry&); void initializePPCVSXSwapRemovalPass(PassRegistry&); void initializePPCReduceCRLogicalsPass(PassRegistry&); diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index ce1d51a..2ab2c14 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2155,7 +2155,8 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() { PPCTargetStreamer *TS = static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer()); - TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), LocalOffsetExp); + TS->emitLocalEntry(static_cast<MCSymbolELF *>(CurrentFnSym), + LocalOffsetExp); } else if (Subtarget->isUsingPCRelativeCalls()) { // When generating the entry point for a function we have a few scenarios // based on whether or not that function uses R2 and whether or not that @@ -2182,7 +2183,7 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() { MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { PPCTargetStreamer *TS = static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer()); - TS->emitLocalEntry(cast<MCSymbolELF>(CurrentFnSym), + TS->emitLocalEntry(static_cast<MCSymbolELF *>(CurrentFnSym), MCConstantExpr::create(1, OutContext)); } } @@ -2766,7 +2767,7 @@ void PPCAIXAsmPrinter::emitGlobalVariableHelper(const GlobalVariable *GV) { if (GV->hasComdat()) report_fatal_error("COMDAT not yet supported by AIX."); - MCSymbolXCOFF *GVSym = cast<MCSymbolXCOFF>(getSymbol(GV)); + auto *GVSym = static_cast<MCSymbolXCOFF *>(getSymbol(GV)); if (GV->isDeclarationForLinker()) { emitLinkage(GV, GVSym); @@ -2859,7 +2860,7 @@ void PPCAIXAsmPrinter::emitFunctionDescriptor() { MCSectionSubPair Current = OutStreamer->getCurrentSection(); // Emit function descriptor. OutStreamer->switchSection( - cast<MCSymbolXCOFF>(CurrentFnDescSym)->getRepresentedCsect()); + static_cast<MCSymbolXCOFF *>(CurrentFnDescSym)->getRepresentedCsect()); // Emit aliasing label for function descriptor csect. for (const GlobalAlias *Alias : GOAliasMap[&MF->getFunction()]) @@ -2994,7 +2995,8 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { SmallString<128> Name; StringRef Prefix = "."; Name += Prefix; - Name += cast<MCSymbolXCOFF>(I.first.first)->getSymbolTableName(); + Name += static_cast<const MCSymbolXCOFF *>(I.first.first) + ->getSymbolTableName(); MCSymbol *S = OutContext.getOrCreateSymbol(Name); TCEntry = static_cast<MCSectionXCOFF *>( getObjFileLowering().getSectionForTOCEntry(S, TM)); @@ -3112,7 +3114,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) { setCsectAlignment(&G); std::optional<CodeModel::Model> OptionalCodeModel = G.getCodeModel(); if (OptionalCodeModel) - setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&G)), + setOptionalCodeModel(static_cast<MCSymbolXCOFF *>(getSymbol(&G)), *OptionalCodeModel); } @@ -3139,7 +3141,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) { if (GVar) { std::optional<CodeModel::Model> OptionalCodeModel = GVar->getCodeModel(); if (OptionalCodeModel) - setOptionalCodeModel(cast<MCSymbolXCOFF>(getSymbol(&Alias)), + setOptionalCodeModel(static_cast<MCSymbolXCOFF *>(getSymbol(&Alias)), *OptionalCodeModel); } @@ -3190,8 +3192,8 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) { case PPC::BL_NOP: { const MachineOperand &MO = MI->getOperand(0); if (MO.isSymbol()) { - MCSymbolXCOFF *S = - cast<MCSymbolXCOFF>(OutContext.getOrCreateSymbol(MO.getSymbolName())); + auto *S = static_cast<MCSymbolXCOFF *>( + OutContext.getOrCreateSymbol(MO.getSymbolName())); ExtSymSDNodeSymbols.insert(S); } } break; diff --git a/llvm/lib/Target/PowerPC/PPCCCState.cpp b/llvm/lib/Target/PowerPC/PPCCCState.cpp deleted file mode 100644 index e08aaf8..0000000 --- a/llvm/lib/Target/PowerPC/PPCCCState.cpp +++ /dev/null @@ -1,33 +0,0 @@ -//===---- PPCCCState.cpp - CCState with PowerPC specific extensions ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PPCCCState.h" -using namespace llvm; - -// Identify lowered values that originated from ppcf128 arguments and record -// this. -void PPCCCState::PreAnalyzeCallOperands( - const SmallVectorImpl<ISD::OutputArg> &Outs) { - for (const auto &I : Outs) { - if (I.ArgVT == llvm::MVT::ppcf128) - OriginalArgWasPPCF128.push_back(true); - else - OriginalArgWasPPCF128.push_back(false); - } -} - -void PPCCCState::PreAnalyzeFormalArguments( - const SmallVectorImpl<ISD::InputArg> &Ins) { - for (const auto &I : Ins) { - if (I.ArgVT == llvm::MVT::ppcf128) { - OriginalArgWasPPCF128.push_back(true); - } else { - OriginalArgWasPPCF128.push_back(false); - } - } -} diff --git a/llvm/lib/Target/PowerPC/PPCCCState.h b/llvm/lib/Target/PowerPC/PPCCCState.h deleted file mode 100644 index b0e50b2..0000000 --- a/llvm/lib/Target/PowerPC/PPCCCState.h +++ /dev/null @@ -1,73 +0,0 @@ -//===---- PPCCCState.h - CCState with PowerPC specific extensions -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef PPCCCSTATE_H -#define PPCCCSTATE_H - -#include "PPCISelLowering.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/CallingConvLower.h" - -namespace llvm { - -class PPCCCState : public CCState { -public: - - void - PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs); - void - PreAnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); - -private: - - // Records whether the value has been lowered from an ppcf128. - SmallVector<bool, 4> OriginalArgWasPPCF128; - -public: - PPCCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) - : CCState(CC, isVarArg, MF, locs, C) {} - - bool WasOriginalArgPPCF128(unsigned ValNo) { return OriginalArgWasPPCF128[ValNo]; } - void clearWasPPCF128() { OriginalArgWasPPCF128.clear(); } -}; - -class AIXCCState : public CCState { -private: - BitVector IsFixed; - -public: - AIXCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF, - SmallVectorImpl<CCValAssign> &Locs, LLVMContext &C) - : CCState(CC, IsVarArg, MF, Locs, C) {} - - void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, - CCAssignFn Fn) { - // All formal arguments are fixed. - IsFixed.resize(Ins.size(), true); - CCState::AnalyzeFormalArguments(Ins, Fn); - } - - void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, - CCAssignFn Fn) { - // Record whether the call operand was a fixed argument. - IsFixed.resize(Outs.size(), false); - for (unsigned ValNo = 0, E = Outs.size(); ValNo != E; ++ValNo) - if (Outs[ValNo].IsFixed) - IsFixed.set(ValNo); - - CCState::AnalyzeCallOperands(Outs, Fn); - } - - bool isFixed(unsigned ValNo) const { return IsFixed.test(ValNo); } -}; - -} // end namespace llvm - -#endif diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp index 46aa27e..6aa2f0c 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoopsVerify.cpp @@ -18,7 +18,6 @@ #ifndef NDEBUG #include "MCTargetDesc/PPCMCTargetDesc.h" #include "PPC.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ilist_iterator.h" @@ -93,7 +92,7 @@ static bool clobbersCTR(const MachineInstr &MI) { static bool verifyCTRBranch(MachineBasicBlock *MBB, MachineBasicBlock::iterator I) { MachineBasicBlock::iterator BI = I; - SmallSet<MachineBasicBlock *, 16> Visited; + SmallPtrSet<MachineBasicBlock *, 16> Visited; SmallVector<MachineBasicBlock *, 8> Preds; bool CheckPreds; diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp index b00d70b..332b5e0 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp @@ -8,7 +8,6 @@ #include "PPCCallingConv.h" #include "PPCSubtarget.h" -#include "PPCCCState.h" using namespace llvm; inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &, diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.h b/llvm/lib/Target/PowerPC/PPCCallingConv.h index ab61472..9c47142 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.h +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.h @@ -21,28 +21,29 @@ namespace llvm { bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool RetCC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); + Type *OrigTy, CCState &State); bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); + ISD::ArgFlagsTy ArgFlags, Type *OrigTy, + CCState &State); } // End llvm namespace diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 899326a..5d4fe06e 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -21,11 +21,9 @@ class CCIfNotSubtarget<string F, CCAction A> F), A>; class CCIfOrigArgWasNotPPCF128<CCAction A> - : CCIf<"!static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)", - A>; + : CCIf<"!OrigTy->isPPC_FP128Ty()", A>; class CCIfOrigArgWasPPCF128<CCAction A> - : CCIf<"static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)", - A>; + : CCIf<"OrigTy->isPPC_FP128Ty()", A>; //===----------------------------------------------------------------------===// // Return Value Calling Convention diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index e92e00f..0b68ba1 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -1374,7 +1374,10 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args, unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, Align(8)); - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); + SmallVector<Type *, 16> ArgTys; + for (Value *Arg : Args) + ArgTys.push_back(Arg->getType()); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, ArgTys, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. for (const CCValAssign &VA : ArgLocs) { @@ -1487,7 +1490,7 @@ bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumByte if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCInfo.AnalyzeCallResult(RetVT, CLI.RetTy, RetCC_PPC64_ELF_FIS); CCValAssign &VA = RVLocs[0]; assert(RVLocs.size() == 1 && "No support for multi-reg return values!"); assert(VA.isRegLoc() && "Can only return in registers!"); @@ -1573,7 +1576,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) { RetVT != MVT::f64) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS); + CCInfo.AnalyzeCallResult(RetVT, RetTy, RetCC_PPC64_ELF_FIS); if (RVLocs.size() > 1) return false; } diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index c0860fc..2ad3ed2 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2078,8 +2078,10 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, // tail call might not be in the new RestoreBlock, so real branch instruction // won't be generated by emitEpilogue(), because shrink-wrap has chosen new // RestoreBlock. So we handle this case here. - if (MFI.getSavePoint() && MFI.hasTailCall()) { - MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); + if (!MFI.getSavePoints().empty() && MFI.hasTailCall()) { + assert(MFI.getRestorePoints().size() < 2 && + "MFI can't contain multiple restore points!"); + MachineBasicBlock *RestoreBlock = MFI.getRestorePoints().front(); for (MachineBasicBlock &MBB : MF) { if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) createTailCallBranchInstr(MBB); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f179873..fa104e4 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14,7 +14,6 @@ #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" -#include "PPCCCState.h" #include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" @@ -30,7 +29,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -159,8 +157,6 @@ static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); -static const char AIXSSPCanaryWordName[] = "__ssp_canary_word"; - // A faster local-[exec|dynamic] TLS access sequence (enabled with the // -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS // variables; consistent with the IBM XL compiler, we apply a max size of @@ -1433,7 +1429,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { - setHasMultipleConditionRegisters(); setJumpIsExpensive(); } @@ -1698,6 +1693,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::XXPERM: return "PPCISD::XXPERM"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; + case PPCISD::VSRQ: + return "PPCISD::VSRQ"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; @@ -1788,11 +1785,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::PADDI_DTPREL: return "PPCISD::PADDI_DTPREL"; - case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; - case PPCISD::SC: return "PPCISD::SC"; - case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; - case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; - case PPCISD::RFEBB: return "PPCISD::RFEBB"; + case PPCISD::VADD_SPLAT: + return "PPCISD::VADD_SPLAT"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; @@ -2704,7 +2698,7 @@ bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { if (!isa<ConstantSDNode>(N)) return false; - Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue(); + Imm = cast<ConstantSDNode>(N)->getSExtValue(); return isInt<34>(Imm); } bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { @@ -3926,9 +3920,6 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget.isAIXABI()) - report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX."); - return Op.getOperand(0); } @@ -3985,9 +3976,6 @@ SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget.isAIXABI()) - report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX."); - SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -3995,22 +3983,76 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + if (Subtarget.isAIXABI()) { + // On AIX we create a trampoline descriptor by combining the + // entry point and TOC from the global descriptor (FPtr) with the + // nest argument as the environment pointer. + uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4; + MaybeAlign PointerAlign(PointerSize); + auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() + ? (MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant) + : MachineMemOperand::MONone; + + uint64_t TOCPointerOffset = 1 * PointerSize; + uint64_t EnvPointerOffset = 2 * PointerSize; + SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT); + SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT); + + const Value *TrampolineAddr = + cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); + const Function *Func = + cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); + + SDValue OutChains[3]; + + // Copy the entry point address from the global descriptor to the + // trampoline buffer. + SDValue LoadEntryPoint = + DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0), + PointerAlign, MMOFlags); + SDValue EPLoadChain = LoadEntryPoint.getValue(1); + OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp, + MachinePointerInfo(TrampolineAddr, 0)); + + // Copy the TOC pointer from the global descriptor to the trampoline + // buffer. + SDValue TOCFromDescriptorPtr = + DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset); + SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr, + MachinePointerInfo(Func, TOCPointerOffset), + PointerAlign, MMOFlags); + SDValue TrampolineTOCPointer = + DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset); + SDValue TOCLoadChain = TOCReg.getValue(1); + OutChains[1] = + DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer, + MachinePointerInfo(TrampolineAddr, TOCPointerOffset)); + + // Store the nest argument into the environment pointer in the trampoline + // buffer. + SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset); + OutChains[2] = + DAG.getStore(Chain, dl, Nest, EnvPointer, + MachinePointerInfo(TrampolineAddr, EnvPointerOffset)); + + SDValue TokenFactor = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + return TokenFactor; + } + bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - - Entry.Ty = IntPtrTy; - Entry.Node = Trmp; Args.push_back(Entry); - + Args.emplace_back(Trmp, IntPtrTy); // TrampSize == (isPPC64 ? 48 : 40); - Entry.Node = - DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()); - Args.push_back(Entry); - - Entry.Node = FPtr; Args.push_back(Entry); - Entry.Node = Nest; Args.push_back(Entry); + Args.emplace_back( + DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()), + IntPtrTy); + Args.emplace_back(FPtr, IntPtrTy); + Args.emplace_back(Nest, IntPtrTy); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); @@ -4286,17 +4328,13 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrAlign); - if (useSoftFloat()) - CCInfo.PreAnalyzeFormalArguments(Ins); - CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); - CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -5540,8 +5578,8 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) { const TargetMachine &TM = Subtarget.getTargetMachine(); const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering(); - MCSymbolXCOFF *S = - cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM)); + auto *S = + static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM)); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); return DAG.getMCSymbol(S, PtrVT); @@ -6018,13 +6056,11 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( // Assign locations to all of the outgoing arguments. SmallVector<CCValAssign, 16> ArgLocs; - PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrAlign); - if (useSoftFloat()) - CCInfo.PreAnalyzeCallOperands(Outs); if (IsVarArg) { // Handle fixed and variable vector arguments differently. @@ -6037,12 +6073,12 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; - if (Outs[i].IsFixed) { + if (!ArgFlags.isVarArg()) { Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, - CCInfo); + Outs[i].OrigTy, CCInfo); } else { Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo); + ArgFlags, Outs[i].OrigTy, CCInfo); } if (Result) { @@ -6057,7 +6093,6 @@ SDValue PPCTargetLowering::LowerCall_32SVR4( // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } - CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector<CCValAssign, 16> ByValArgLocs; @@ -6853,8 +6888,7 @@ static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) { static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &S) { - AIXCCState &State = static_cast<AIXCCState &>(S); + Type *OrigTy, CCState &State) { const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); @@ -6866,9 +6900,6 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); - if (ArgFlags.isNest()) - report_fatal_error("Nest arguments are unimplemented."); - static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; @@ -6883,6 +6914,14 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32; + if (ArgFlags.isNest()) { + MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11); + if (!EnvReg) + report_fatal_error("More then one nest argument."); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo)); + return false; + } + if (ArgFlags.isByVal()) { const Align ByValAlign(ArgFlags.getNonZeroByValAlign()); if (ByValAlign > StackAlign) @@ -7033,7 +7072,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, // They are passed in VRs if any are available (unlike arguments passed // through ellipses) and shadow GPRs (unlike arguments to non-vaarg // functions) - if (State.isFixed(ValNo)) { + if (!ArgFlags.isVarArg()) { if (MCRegister VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); // Shadow allocate GPRs and stack space even though we pass in a VR. @@ -7221,7 +7260,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); - AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); const EVT PtrVT = getPointerTy(MF.getDataLayout()); // Reserve space for the linkage area on the stack. @@ -7568,8 +7607,8 @@ SDValue PPCTargetLowering::LowerCall_AIX( MachineFunction &MF = DAG.getMachineFunction(); SmallVector<CCValAssign, 16> ArgLocs; - AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs, - *DAG.getContext()); + CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs, + *DAG.getContext()); // Reserve space for the linkage save area (LSA) on the stack. // In both PPC32 and PPC64 there are 6 reserved slots in the LSA: @@ -8465,10 +8504,11 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. - if (Op.getValueType() == MVT::i32 && !i32Stack) { + if (Op.getValueType() == MVT::i32 && !i32Stack && + !Subtarget.isLittleEndian()) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); - MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); + MPI = MPI.getWithOffset(4); } RLI.Chain = Chain; @@ -9594,12 +9634,14 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, return false; } -bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) { +bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, + bool IsLittleEndian) { assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector"); BitMask.clearAllBits(); EVT VT = BVN.getValueType(0); - APInt ConstValue(VT.getSizeInBits(), 0); + unsigned VTSize = VT.getSizeInBits(); + APInt ConstValue(VTSize, 0); unsigned EltWidth = VT.getScalarSizeInBits(); @@ -9609,8 +9651,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) { if (!CN) return false; - - ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); + // The elements in a vector register are ordered in reverse byte order + // between little-endian and big-endian modes. + ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), + IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos); BitPos += EltWidth; } @@ -9641,7 +9685,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // we do not convert it to MTVSRBMI. // The xxleqv instruction sets a vector with all ones. // The xxlxor instruction sets a vector with all zeros. - if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0xffff) { + if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) && + BitMask != 0 && BitMask != 0xffff) { SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32); MachineSDNode *MSDNode = DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant); @@ -11231,6 +11276,24 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getMergeValues(RetOps, dl); } + case Intrinsic::ppc_mma_build_dmr: { + SmallVector<SDValue, 8> Pairs; + SmallVector<SDValue, 8> Chains; + for (int i = 1; i < 9; i += 2) { + SDValue Hi = Op.getOperand(i); + SDValue Lo = Op.getOperand(i + 1); + if (Hi->getOpcode() == ISD::LOAD) + Chains.push_back(Hi.getValue(1)); + if (Lo->getOpcode() == ISD::LOAD) + Chains.push_back(Lo.getValue(1)); + Pairs.push_back( + DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo})); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG); + return DAG.getMergeValues({Value, TF}, dl); + } + case Intrinsic::ppc_mma_dmxxextfdmr512: { assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future"); auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -11567,6 +11630,10 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, Op.getOperand(0)), 0); } + case Intrinsic::ppc_mma_disassemble_dmr: { + return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2), + Op.getOperand(ArgStart + 1), MachinePointerInfo()); + } default: break; } @@ -12056,6 +12123,24 @@ SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op, return DAG.getMergeValues({DmrPValue, TF}, dl); } +SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs, + const SDLoc &dl, + SelectionDAG &DAG) const { + SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0], + Pairs[1]), + 0); + SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32); + SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1, + Pairs[2], Pairs[3]), + 0); + SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32); + SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32); + + return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, + {RC, Lo, LoSub, Hi, HiSub}), + 0); +} + SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -14761,9 +14846,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); - SmallSet<SDNode *, 16> LoadRoots; + SmallPtrSet<SDNode *, 16> LoadRoots; SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); - SmallSet<SDNode *, 16> Visited; + SmallPtrSet<SDNode *, 16> Visited; // First, search up the chain, branching to follow all token-factor operands. // If we find a consecutive load, then we're done, otherwise, record all @@ -18610,24 +18695,6 @@ bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const { return TargetLowering::useLoadStackGuardNode(M); } -// Override to disable global variable loading on Linux and insert AIX canary -// word declaration. -void PPCTargetLowering::insertSSPDeclarations(Module &M) const { - if (Subtarget.isAIXABI()) { - M.getOrInsertGlobal(AIXSSPCanaryWordName, - PointerType::getUnqual(M.getContext())); - return; - } - if (!Subtarget.isTargetLinux()) - return TargetLowering::insertSSPDeclarations(M); -} - -Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const { - if (Subtarget.isAIXABI()) - return M.getGlobalVariable(AIXSSPCanaryWordName); - return TargetLowering::getSDagStackGuard(M); -} - bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { if (!VT.isSimple() || !Subtarget.hasVSX()) @@ -19492,12 +19559,10 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op, DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout())); bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; for (const SDValue &N : Op->op_values()) { EVT ArgVT = N.getValueType(); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = N; - Entry.Ty = ArgTy; + TargetLowering::ArgListEntry Entry(N, ArgTy); Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend); Entry.IsZExt = !Entry.IsSExt; Args.push_back(Entry); @@ -19856,3 +19921,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( return Builder.CreateOr( Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); } + +bool PPCTargetLowering::hasMultipleConditionRegisters(EVT VT) const { + return Subtarget.useCRBits(); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 124c711..6694305 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -430,20 +430,6 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, - /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned - /// operand identifies the operating system entry point. - SC, - - /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer. - CLRBHRB, - - /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch - /// history rolling buffer entry. - MFBHRBE, - - /// CHAIN = RFEBB CHAIN, State - Return from event-based branch. - RFEBB, - /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little /// endian. Maps to an xxswapd instruction that corrects an lxvd2x /// or stxvd2x instruction. The chain is necessary because the @@ -512,6 +498,9 @@ namespace llvm { /// SETBCR - The ISA 3.1 (P10) SETBCR instruction. SETBCR, + /// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction + VSRQ, + // NOTE: The nodes below may require PC-Rel specific patterns if the // address could be PC-Relative. When adding new nodes below, consider // whether or not the address can be PC-Relative and add the corresponding @@ -1155,8 +1144,6 @@ namespace llvm { /// Override to support customized stack guard loading. bool useLoadStackGuardNode(const Module &M) const override; - void insertSSPDeclarations(Module &M) const override; - Value *getSDagStackGuard(const Module &M) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; @@ -1207,6 +1194,8 @@ namespace llvm { bool IsVarArg) const; bool supportsTailCallFor(const CallBase *CB) const; + bool hasMultipleConditionRegisters(EVT VT) const override; + private: struct ReuseLoadInfo { SDValue Ptr; @@ -1359,6 +1348,8 @@ namespace llvm { SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs, + const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index fd20843..9359311 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1095,8 +1095,7 @@ let hasSideEffects = 0 in { defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$RA), (ins g8rc:$RAi, g8rc:$RS, u6imm:$SH, u6imm:$MBE), "rldimi", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI, - []>, isPPC64, RegConstraint<"$RAi = $RA">, - NoEncode<"$RAi">; + []>, isPPC64, RegConstraint<"$RAi = $RA">; // Rotate instructions. defm RLDCL : MDSForm_1r<30, 8, @@ -1156,7 +1155,7 @@ defm RLWIMI8 : MForm_2r<20, (outs g8rc:$RA), (ins g8rc:$RAi, g8rc:$RS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwimi", "$RA, $RS, $SH, $MB, $ME", IIC_IntRotate, []>, PPC970_DGroup_Cracked, - RegConstraint<"$RAi = $RA">, NoEncode<"$RAi">; + RegConstraint<"$RAi = $RA">; let isSelect = 1 in def ISEL8 : AForm_4<31, 15, @@ -1313,21 +1312,18 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAU8 : DForm_1<43, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lhau $RST, $addr", IIC_LdStLHAU, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; // NO LWAU! let Interpretation64Bit = 1, isCodeGenOnly = 1 in def LHAUX8 : XForm_1_memOp<31, 375, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lhaux $RST, $addr", IIC_LdStLHAUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LWAUX : XForm_1_memOp<31, 373, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lwaux $RST, $addr", IIC_LdStLHAUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">, isPPC64; + []>, RegConstraint<"$addr.ptrreg = $ea_result">, isPPC64; } } @@ -1366,34 +1362,28 @@ let mayLoad = 1, hasSideEffects = 0 in { def LBZU8 : DForm_1<35, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lbzu $RST, $addr", IIC_LdStLoadUpd, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LHZU8 : DForm_1<41, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lhzu $RST, $addr", IIC_LdStLoadUpd, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LWZU8 : DForm_1<33, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lwzu $RST, $addr", IIC_LdStLoadUpd, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LBZUX8 : XForm_1_memOp<31, 119, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lbzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LHZUX8 : XForm_1_memOp<31, 311, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lhzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lwzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; } } } // Interpretation64Bit @@ -1445,14 +1435,12 @@ let mayLoad = 1, hasSideEffects = 0 in { def LDU : DSForm_1<58, 1, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrix $D, $RA):$addr), "ldu $RST, $addr", IIC_LdStLDU, - []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64; def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "ldux $RST, $addr", IIC_LdStLDUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">, isPPC64; + []>, RegConstraint<"$addr.ptrreg = $ea_result">, isPPC64; } let mayLoad = 1, hasNoSchedulingInfo = 1 in { @@ -1718,45 +1706,41 @@ let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr), "stbu $RST, $addr", IIC_LdStSTU, []>, - RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$addr.reg = $ea_res">; def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr), "sthu $RST, $addr", IIC_LdStSTU, []>, - RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$addr.reg = $ea_res">; def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr), "stwu $RST, $addr", IIC_LdStSTU, []>, - RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$addr.reg = $ea_res">; def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "stbux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "sthux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "stwux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } // Interpretation64Bit def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memrix $D, $RA):$addr), "stdu $RST, $addr", IIC_LdStSTU, []>, - RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">, + RegConstraint<"$addr.reg = $ea_res">, isPPC64; def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memrr $RA, $RB):$addr), "stdux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index 24287a9..97d5e28 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -261,6 +261,13 @@ def immEQOneV : PatLeaf<(build_vector), [{ return C->isOne(); return false; }]>; + +def VSRVSRO : PatFrag<(ops node:$input, node:$shift), + (int_ppc_altivec_vsr + (int_ppc_altivec_vsro node:$input, node:$shift), + node:$shift), + [{ return N->getOperand(1).hasOneUse(); }]>; + //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. @@ -1471,13 +1478,13 @@ def VINSERTB : VXForm_1<781, (outs vrrc:$VD), "vinsertb $VD, $VB, $VA", IIC_VecGeneral, [(set v16i8:$VD, (PPCvecinsert v16i8:$VDi, v16i8:$VB, imm32SExt16:$VA))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VINSERTH : VXForm_1<845, (outs vrrc:$VD), (ins vrrc:$VDi, u4imm:$VA, vrrc:$VB), "vinserth $VD, $VB, $VA", IIC_VecGeneral, [(set v8i16:$VD, (PPCvecinsert v8i16:$VDi, v8i16:$VB, imm32SExt16:$VA))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; @@ -1569,7 +1576,7 @@ def VRLWMI : VXForm_1<133, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VDi), [(set v4i32:$VD, (int_ppc_altivec_vrlwmi v4i32:$VA, v4i32:$VB, v4i32:$VDi))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", [(set v2i64:$VD, (int_ppc_altivec_vrldnm v2i64:$VA, @@ -1579,7 +1586,7 @@ def VRLDMI : VXForm_1<197, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VDi), [(set v2i64:$VD, (int_ppc_altivec_vrldmi v2i64:$VA, v2i64:$VB, v2i64:$VDi))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; // Vector Shift Left/Right def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", @@ -1630,9 +1637,11 @@ def BCDCTSQ_rec : VX_VT5_EO5_VB5_XO9_o <0, 385, "bcdctsq.", []>; // Decimal Copy-Sign/Set-Sign let Defs = [CR6] in -def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>; +def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", + [(set v16i8:$VD, (int_ppc_bcdcopysign v16i8:$VA, v16i8:$VB))]>; -def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>; +def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", + [(set v16i8:$VD, (int_ppc_bcdsetsign v16i8:$VB, i32:$PS))]>; // Decimal Shift/Unsigned-Shift/Shift-and-Round def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td index 2ced3fe..b4b475b4 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -13,7 +13,6 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<32> Inst; - field bits<32> SoftFail = 0; let Size = 4; bit PPC64 = 0; // Default value, override with isPPC64 @@ -95,7 +94,6 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<64> Inst; - field bits<64> SoftFail = 0; let Size = 8; bit PPC64 = 0; // Default value, override with isPPC64 diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td index 80fac18..7277e36 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -45,6 +45,81 @@ multiclass XOForm_RTAB5_L1r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, } } +class VXForm_VRTB5<bits<11> xo, bits<5> R, dag OOL, dag IOL, string asmstr, + list<dag> pattern> : I<4, OOL, IOL, asmstr, NoItinerary> { + bits<5> VRT; + bits<5> VRB; + + let Pattern = pattern; + + let Inst{6 -10} = VRT; + let Inst{11 -15} = R; + let Inst{16 -20} = VRB; + let Inst{21 -31} = xo; +} + +class VXForm_VRTB5_UIM2<bits<11> xo, bits<3> R, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : I<4, OOL, IOL, asmstr, NoItinerary> { + bits<5> VRT; + bits<5> VRB; + bits<2> UIM; + + let Pattern = pattern; + + let Inst{6 -10} = VRT; + let Inst{11 -13} = R; + let Inst{14 -15} = UIM; + let Inst{16 -20} = VRB; + let Inst{21 -31} = xo; +} + +class VXForm_VRTB5_UIM1<bits<11> xo, bits<4> R, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : I<4, OOL, IOL, asmstr, NoItinerary> { + bits<5> VRT; + bits<5> VRB; + bits<1> UIM; + + let Pattern = pattern; + + let Inst{6 -10} = VRT; + let Inst{11 -14} = R; + let Inst{15} = UIM; + let Inst{16 -20} = VRB; + let Inst{21 -31} = xo; +} + +class VXForm_VRTB5_UIM3<bits<11> xo, bits<2> R, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : I<4, OOL, IOL, asmstr, NoItinerary> { + bits<5> VRT; + bits<5> VRB; + bits<3> UIM; + + let Pattern = pattern; + + let Inst{6 -10} = VRT; + let Inst{11 -12} = R; + let Inst{13 -15} = UIM; + let Inst{16 -20} = VRB; + let Inst{21 -31} = xo; +} + +class VXForm_VRTAB5<bits<11> xo, dag OOL, dag IOL, string asmstr, + list<dag> pattern> : I<4, OOL, IOL, asmstr, NoItinerary> { + bits<5> VRT; + bits<5> VRA; + bits<5> VRB; + + let Pattern = pattern; + + let Inst{6 -10} = VRT; + let Inst{11 -15} = VRA; + let Inst{16 -20} = VRB; + let Inst{21 -31} = xo; +} + let Predicates = [IsISAFuture] in { defm SUBFUS : XOForm_RTAB5_L1r<31, 72, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB, u1imm:$L), @@ -54,31 +129,65 @@ let Predicates = [IsISAFuture] in { let Predicates = [HasVSX, IsISAFuture] in { let mayLoad = 1 in { def LXVRL - : XX1Form_memOp<31, 525, (outs vsrc:$XT), (ins memr:$RA, g8rc:$RB), - "lxvrl $XT, $RA, $RB", IIC_LdStLoad, []>; + : XX1Form_memOp<31, 525, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB), + "lxvrl $XT, $addr, $RB", IIC_LdStLoad, []>; def LXVRLL - : XX1Form_memOp<31, 557, (outs vsrc:$XT), (ins memr:$RA, g8rc:$RB), - "lxvrll $XT, $RA, $RB", IIC_LdStLoad, []>; + : XX1Form_memOp<31, 557, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB), + "lxvrll $XT, $addr, $RB", IIC_LdStLoad, []>; def LXVPRL - : XForm_XTp5_XAB5<31, 589, (outs vsrprc:$XTp), (ins memr:$RA, g8rc:$RB), - "lxvprl $XTp, $RA, $RB", IIC_LdStLFD, []>; + : XForm_XTp5_XAB5<31, 589, (outs vsrprc:$XTp), (ins (memr $RA):$addr, g8rc:$RB), + "lxvprl $XTp, $addr, $RB", IIC_LdStLFD, []>; def LXVPRLL - : XForm_XTp5_XAB5<31, 621, (outs vsrprc:$XTp), (ins memr:$RA, g8rc:$RB), - "lxvprll $XTp, $RA, $RB", IIC_LdStLFD, []>; + : XForm_XTp5_XAB5<31, 621, (outs vsrprc:$XTp), (ins (memr $RA):$addr, g8rc:$RB), + "lxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; } let mayStore = 1 in { def STXVRL - : XX1Form_memOp<31, 653, (outs), (ins vsrc:$XT, memr:$RA, g8rc:$RB), - "stxvrl $XT, $RA, $RB", IIC_LdStLoad, []>; + : XX1Form_memOp<31, 653, (outs), (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB), + "stxvrl $XT, $addr, $RB", IIC_LdStLoad, []>; def STXVRLL - : XX1Form_memOp<31, 685, (outs), (ins vsrc:$XT, memr:$RA, g8rc:$RB), - "stxvrll $XT, $RA, $RB", IIC_LdStLoad, []>; + : XX1Form_memOp<31, 685, (outs), (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB), + "stxvrll $XT, $addr, $RB", IIC_LdStLoad, []>; def STXVPRL : XForm_XTp5_XAB5<31, 717, (outs), - (ins vsrprc:$XTp, memr:$RA, g8rc:$RB), - "stxvprl $XTp, $RA, $RB", IIC_LdStLFD, []>; + (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB), + "stxvprl $XTp, $addr, $RB", IIC_LdStLFD, []>; def STXVPRLL : XForm_XTp5_XAB5<31, 749, (outs), - (ins vsrprc:$XTp, memr:$RA, g8rc:$RB), - "stxvprll $XTp, $RA, $RB", IIC_LdStLFD, []>; + (ins vsrprc:$XTp, (memr $RA):$addr, g8rc:$RB), + "stxvprll $XTp, $addr, $RB", IIC_LdStLFD, []>; } + + def VUPKHSNTOB : VXForm_VRTB5<387, 0, (outs vrrc:$VRT), (ins vrrc:$VRB), + "vupkhsntob $VRT, $VRB", []>; + def VUPKLSNTOB : VXForm_VRTB5<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB), + "vupklsntob $VRT, $VRB", []>; + def VUPKINT4TOBF16 + : VXForm_VRTB5_UIM2<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), + "vupkint4tobf16 $VRT, $VRB, $UIM", []>; + def VUPKINT8TOBF16 + : VXForm_VRTB5_UIM1<387, 1, (outs vrrc:$VRT), (ins vrrc:$VRB, u1imm:$UIM), + "vupkint8tobf16 $VRT, $VRB, $UIM", []>; + def VUPKINT8TOFP32 + : VXForm_VRTB5_UIM2<387, 3, (outs vrrc:$VRT), (ins vrrc:$VRB, u2imm:$UIM), + "vupkint8tofp32 $VRT, $VRB, $UIM", []>; + def VUPKINT4TOFP32 + : VXForm_VRTB5_UIM3<387, 2, (outs vrrc:$VRT), (ins vrrc:$VRB, u3imm:$UIM), + "vupkint4tofp32 $VRT, $VRB, $UIM", []>; + + def VUCMPRHN : VXForm_VRTAB5<3, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprhn $VRT, $VRA, $VRB", []>; + def VUCMPRLN : VXForm_VRTAB5<67, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprln $VRT, $VRA, $VRB", []>; + def VUCMPRHB + : VXForm_VRTAB5<131, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprhb $VRT, $VRA, $VRB", []>; + def VUCMPRLB + : VXForm_VRTAB5<195, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprlb $VRT, $VRA, $VRB", []>; + def VUCMPRHH + : VXForm_VRTAB5<259, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprhh $VRT, $VRA, $VRB", []>; + def VUCMPRLH + : VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB), + "vucmprlh $VRT, $VRA, $VRB", []>; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td index ef8b27f..a3a75c0 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -8,12 +8,13 @@ //===----------------------------------------------------------------------===// // // This file describes the instructions introduced for the Future CPU for MMA. +// Please reference "PPCInstrVSX.td" for file structure. // //===----------------------------------------------------------------------===// class XX3Form_AT3_XABp5_P1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, NoItinerary> { + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<3> AT; bits<5> XAp; bits<5> XBp; @@ -21,13 +22,13 @@ class XX3Form_AT3_XABp5_P1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, let Pattern = pattern; - let Inst{6-8} = AT{2-0}; - let Inst{9-10} = 0; - let Inst{11-14} = XAp{3-0}; + let Inst{6 -8} = AT{2 -0}; + let Inst{9 -10} = 0; + let Inst{11 -14} = XAp{3 -0}; let Inst{15} = P; - let Inst{16-19} = XBp{3-0}; + let Inst{16 -19} = XBp{3 -0}; let Inst{20} = 0; - let Inst{21-28} = xo; + let Inst{21 -28} = xo; let Inst{29} = XAp{4}; let Inst{30} = XBp{4}; let Inst{31} = 0; @@ -35,65 +36,64 @@ class XX3Form_AT3_XABp5_P1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, class XX2Form_AT3_XBp5_P2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, NoItinerary> { + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<3> AT; bits<5> XBp; bits<2> P; let Pattern = pattern; - let Inst{6-8} = AT{2-0}; - let Inst{9-14} = 0; + let Inst{6 -8} = AT{2 -0}; + let Inst{9 -14} = 0; let Inst{15} = P{0}; - let Inst{16-19} = XBp{3-0}; + let Inst{16 -19} = XBp{3 -0}; let Inst{20} = P{1}; - let Inst{21-29} = xo; + let Inst{21 -29} = xo; let Inst{30} = XBp{4}; let Inst{31} = 0; } class XForm_ATB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, string asmstr, list<dag> pattern> - : I <opcode, OOL, IOL, asmstr, NoItinerary> { + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<3> AT; bits<3> AB; let Pattern = pattern; - let Inst{6-8} = AT{2-0}; - let Inst{9-10} = 0; - let Inst{11-15} = o; - let Inst{16-18} = AB{2-0}; - let Inst{19-20} = 0; - let Inst{21-30} = xo; + let Inst{6 -8} = AT{2 -0}; + let Inst{9 -10} = 0; + let Inst{11 -15} = o; + let Inst{16 -18} = AB{2 -0}; + let Inst{19 -20} = 0; + let Inst{21 -30} = xo; let Inst{31} = 0; } class XX3Form_AT3_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { + string asmstr, InstrItinClass itin, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, itin> { bits<3> AT; bits<5> XAp; bits<6> XB; let Pattern = pattern; - let Inst{6-8} = AT; - let Inst{9-10} = 0; - let Inst{11-14} = XAp{3-0}; + let Inst{6 -8} = AT; + let Inst{9 -10} = 0; + let Inst{11 -14} = XAp{3 -0}; let Inst{15} = 0; - let Inst{16-20} = XB{4-0}; - let Inst{21-28} = xo; - let Inst{29} = XAp{4}; - let Inst{30} = XB{5}; + let Inst{16 -20} = XB{4 -0}; + let Inst{21 -28} = xo; + let Inst{29} = XAp{4}; + let Inst{30} = XB{5}; let Inst{31} = 0; } class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { + : PI<1, opcode, OOL, IOL, asmstr, itin> { bits<3> AT; bits<5> XAp; bits<6> XB; @@ -104,29 +104,29 @@ class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, let Pattern = pattern; // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-19} = PMSK; - let Inst{20-27} = XMSK; - let Inst{28-31} = YMSK; + let Inst{6 -7} = 3; + let Inst{8 -11} = 9; + let Inst{12 -15} = 0; + let Inst{16 -19} = PMSK; + let Inst{20 -27} = XMSK; + let Inst{28 -31} = YMSK; // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-46} = XAp{3-0}; + let Inst{38 -40} = AT; + let Inst{41 -42} = 0; + let Inst{43 -46} = XAp{3 -0}; let Inst{47} = 0; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; + let Inst{48 -52} = XB{4 -0}; + let Inst{53 -60} = xo; let Inst{61} = XAp{4}; let Inst{62} = XB{5}; let Inst{63} = 0; } class MMIRR_XX3Form_X8Y4P2_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, - string asmstr, InstrItinClass itin, - list<dag> pattern> - : PI<1, opcode, OOL, IOL, asmstr, itin> { + string asmstr, InstrItinClass itin, + list<dag> pattern> + : PI<1, opcode, OOL, IOL, asmstr, itin> { bits<3> AT; bits<5> XAp; bits<6> XB; @@ -137,21 +137,21 @@ class MMIRR_XX3Form_X8Y4P2_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, let Pattern = pattern; // The prefix. - let Inst{6-7} = 3; - let Inst{8-11} = 9; - let Inst{12-15} = 0; - let Inst{16-17} = PMSK; - let Inst{18-19} = 0; - let Inst{20-27} = XMSK; - let Inst{28-31} = YMSK; + let Inst{6 -7} = 3; + let Inst{8 -11} = 9; + let Inst{12 -15} = 0; + let Inst{16 -17} = PMSK; + let Inst{18 -19} = 0; + let Inst{20 -27} = XMSK; + let Inst{28 -31} = YMSK; // The instruction. - let Inst{38-40} = AT; - let Inst{41-42} = 0; - let Inst{43-46} = XAp{3-0}; + let Inst{38 -40} = AT; + let Inst{41 -42} = 0; + let Inst{43 -46} = XAp{3 -0}; let Inst{47} = 0; - let Inst{48-52} = XB{4-0}; - let Inst{53-60} = xo; + let Inst{48 -52} = XB{4 -0}; + let Inst{53 -60} = xo; let Inst{61} = XAp{4}; let Inst{62} = XB{5}; let Inst{63} = 0; @@ -160,14 +160,15 @@ class MMIRR_XX3Form_X8Y4P2_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { let Predicates = [MMA, IsISAFuture] in { - def NAME : - XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL, - !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NAME + : XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL, + !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PP + : XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } @@ -175,202 +176,217 @@ multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, string asmstr> { defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PM#NAME : - MMIRR_XX3Form_X8YP4_XAp5B6< - opcode, !or(xo, 0x01), (outs dmr:$AT), - !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X8YP4_XAp5B6< - opcode, xo, (outs dmr:$AT), - !con((ins dmr:$ATi), - !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME + : MMIRR_XX3Form_X8YP4_XAp5B6< + opcode, !or(xo, 0x01), (outs dmr:$AT), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP + : MMIRR_XX3Form_X8YP4_XAp5B6< + opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } multiclass DMR_BF16_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { + string asmstr> { let Predicates = [MMA, IsISAFuture] in { - def NAME : - XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x11), (outs dmr:$AT), IOL, - !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NAME + : XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x11), (outs dmr:$AT), IOL, + !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PP + : XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } -multiclass DMR_BF16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { +multiclass DMR_BF16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { defm NAME : DMR_BF16_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PM#NAME : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !or(xo, 0x11), (outs dmr:$AT), - !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, xo, (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !or(xo, 0x11), (outs dmr:$AT), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } -multiclass DMR_F16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { +multiclass DMR_F16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PM#NAME : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !or(xo, 0x01), (outs dmr:$AT), - !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, xo, (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !or(xo, 0x01), (outs dmr:$AT), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, xo, (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { + string asmbase, string asmstr> { defm NAME : DMR_BF16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, IsISAFuture] in { - def PN : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0xF9), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0x39), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0xA0), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PN + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0xF9), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def NP + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0x39), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def NN + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0xA0), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PM#NAME#PN : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0xF9), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0x39), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0xA0), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0xF9), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def PM#NAME#NP + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0x39), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def PM#NAME#NN + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0xA0), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } multiclass DMR_NEG_UM_M284_XOXORd11188<bits<6> opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { + string asmbase, string asmstr> { defm NAME : DMR_F16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>; let Predicates = [MMA, IsISAFuture] in { - def PN : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0xD1), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0x11), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAp5B6< - opcode, !xor(xo, 0x88), (outs dmr:$AT), !con((ins dmr:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PN + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0xD1), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def NP + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0x11), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def NN + : XX3Form_AT3_XAp5B6<opcode, !xor(xo, 0x88), (outs dmr:$AT), + !con((ins dmr:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PM#NAME#PN : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0xD1), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0x11), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_X8Y4P2_XAp5B6< - opcode, !xor(xo, 0x88), (outs dmr:$AT), - !con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0xD1), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def PM#NAME#NP + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0x11), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; + def PM#NAME#NN + : MMIRR_XX3Form_X8Y4P2_XAp5B6< + opcode, !xor(xo, 0x88), (outs dmr:$AT), + !con((ins dmr:$ATi), + !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">; } } class XForm_AT3_T1_AB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, string asmstr, list<dag> pattern> - : I <opcode, OOL, IOL, asmstr, NoItinerary> { + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<3> AT; bits<3> AB; bits<1> T; let Pattern = pattern; - let Inst{6-8} = AT{2-0}; + let Inst{6 -8} = AT{2 -0}; let Inst{9} = 0; let Inst{10} = T; - let Inst{11-15} = o; - let Inst{16-18} = AB{2-0}; - let Inst{19-20} = 0; - let Inst{21-30} = xo; + let Inst{11 -15} = o; + let Inst{16 -18} = AB{2 -0}; + let Inst{19 -20} = 0; + let Inst{21 -30} = xo; let Inst{31} = 0; } class XForm_ATp2_SR5<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, string asmstr, list<dag> pattern> - : I <opcode, OOL, IOL, asmstr, NoItinerary> { + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<2> ATp; bits<5> SR; let Pattern = pattern; - let Inst{6-7} = ATp{1-0}; - let Inst{8-10} = 0; - let Inst{11-15} = o; - let Inst{16-20} = SR{4-0}; - let Inst{21-30} = xo; + let Inst{6 -7} = ATp{1 -0}; + let Inst{8 -10} = 0; + let Inst{11 -15} = o; + let Inst{16 -20} = SR{4 -0}; + let Inst{21 -30} = xo; let Inst{31} = 0; } class XX2Form_AT3_XB6_ID2_E1_BL2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, - string asmstr, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, NoItinerary> { + string asmstr, list<dag> pattern> + : I<opcode, OOL, IOL, asmstr, NoItinerary> { bits<3> AT; bits<6> XB; bits<2> ID; @@ -379,41 +395,48 @@ class XX2Form_AT3_XB6_ID2_E1_BL2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, let Pattern = pattern; - let Inst{6-8} = AT{2-0}; - let Inst{9-10} = 0; - let Inst{11-12} = ID{1-0}; + let Inst{6 -8} = AT{2 -0}; + let Inst{9 -10} = 0; + let Inst{11 -12} = ID{1 -0}; let Inst{13} = E; - let Inst{14-15} = BL{1-0}; - let Inst{16-20} = XB{4-0}; - let Inst{21-29} = xo; + let Inst{14 -15} = BL{1 -0}; + let Inst{16 -20} = XB{4 -0}; + let Inst{21 -29} = xo; let Inst{30} = XB{5}; let Inst{31} = 0; } -let Predicates = [IsISAFuture] in { - def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226, - (outs vsrprc:$XAp, vsrprc:$XBp), - (ins wacc:$AT), - "dmxxextfdmr512 $XAp, $XBp, $AT, 0", []> { +//-------------------------- Instruction definitions -------------------------// +// Predicate combinations available: +// [MMA, IsISAFuture] +// [MMA, PrefixInstrs, IsISAFuture] + +let Predicates = [MMA, IsISAFuture] in { + def DMXXEXTFDMR512 + : XX3Form_AT3_XABp5_P1<60, 226, (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc:$AT), + "dmxxextfdmr512 $XAp, $XBp, $AT, 0", []> { let P = 0; } - def DMXXEXTFDMR512_HI : XX3Form_AT3_XABp5_P1<60, 226, - (outs vsrprc:$XAp, vsrprc:$XBp), - (ins wacc_hi:$AT), - "dmxxextfdmr512 $XAp, $XBp, $AT, 1", []> { + def DMXXEXTFDMR512_HI + : XX3Form_AT3_XABp5_P1<60, 226, (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc_hi:$AT), + "dmxxextfdmr512 $XAp, $XBp, $AT, 1", []> { let P = 1; } - def DMXXINSTDMR512 : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc:$AT), - (ins vsrprc:$XAp, vsrprc:$XBp), - "dmxxinstdmr512 $AT, $XAp, $XBp, 0", []> { + def DMXXINSTDMR512 + : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstdmr512 $AT, $XAp, $XBp, 0", []> { let P = 0; } - def DMXXINSTDMR512_HI : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc_hi:$AT), - (ins vsrprc:$XAp, vsrprc:$XBp), - "dmxxinstdmr512 $AT, $XAp, $XBp, 1", []> { + def DMXXINSTDMR512_HI + : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc_hi:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstdmr512 $AT, $XAp, $XBp, 1", []> { let P = 1; } @@ -422,236 +445,220 @@ let Predicates = [IsISAFuture] in { "dmxxextfdmr256 $XBp, $AT, $P", []>; def DMXXINSTDMR256 : XX2Form_AT3_XBp5_P2<60, 485, (outs dmrrowp:$AT), - (ins vsrprc:$XBp, u2imm:$P), - "dmxxinstdmr256 $AT, $XBp, $P", []>; + (ins vsrprc:$XBp, u2imm:$P), + "dmxxinstdmr256 $AT, $XBp, $P", []>; - def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB), - "dmmr $AT, $AB", - [(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>; + def DMMR + : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB), "dmmr $AT, $AB", + [(set v1024i1:$AT, (int_ppc_mma_dmmr v1024i1:$AB))]>; def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB), "dmxor $AT, $AB", - [(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, v1024i1:$AB))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - - def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), - "dmsetdmrz $AT", NoItinerary, - [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>; -} - -// MMA+ accumulating/non-accumulating instructions. - -// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP -defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB), - "dmxvi8gerx4", "$AT, $XAp, $XB">; - -let Predicates = [MMA, IsISAFuture] in { - def DMXVI8GERX4SPP : - XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB), - "dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; -} + [(set v1024i1:$AT, (int_ppc_mma_dmxor v1024i1:$ATi, + v1024i1:$AB))]>, + RegConstraint<"$ATi = $AT">; + + def DMSETDMRZ + : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), "dmsetdmrz $AT", + NoItinerary, [(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>; + + // DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP + defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB), + "dmxvi8gerx4", "$AT, $XAp, $XB">; + + // DMXVBF16GERX2, DMXVBF16GERX2PP, DMXVBF16GERX2PN, dMXVBF16GERX2NP, + // DMXVBF16GERX2NN PMDMXVBF16GERX2, PMDMXVBF16GERX2PP, PMDMXVBF16GERX2PN, + // PMDMXVBF16GERX2NP, PMDMXVBF16GERX2NN + defm DMXVBF16GERX2 + : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB), + "dmxvbf16gerx2", "$AT, $XAp, $XB">; + + // DMXVF16GERX2, DMXVF16GERX2PP, DMXVF16GERX2PN, dMXVF16GERX2NP, + // DMXVF16GERX2NN PMDMXVF16GERX2, PMDMXVF16GERX2PP, PMDMXVF16GERX2PN, + // PMDMXVF16GERX2NP, PMDMXVF16GERX2NN + defm DMXVF16GERX2 + : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB), + "dmxvf16gerx2", "$AT, $XAp, $XB">; + + // DMF cryptography [support] Instructions + def DMSHA2HASH + : XForm_AT3_T1_AB3< + 31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T), + "dmsha2hash $AT, $AB, $T", + [(set v1024i1:$AT, (int_ppc_mma_dmsha2hash v1024i1:$ATi, + v1024i1:$AB, timm:$T))]>, + RegConstraint<"$ATi = $AT">; + def DMSHA3HASH + : XForm_ATp2_SR5<31, 15, 177, (outs dmrp:$ATp), + (ins dmrp:$ATpi, u5imm:$SR), "dmsha3hash $ATp, $SR", + [(set v2048i1:$ATp, + (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR))]>, + RegConstraint<"$ATpi = $ATp">; + def DMXXSHAPAD + : XX2Form_AT3_XB6_ID2_E1_BL2<60, 421, (outs dmr:$AT), + (ins dmr:$ATi, vsrc:$XB, u2imm:$ID, u1imm:$E, + u2imm:$BL), + "dmxxshapad $AT, $XB, $ID, $E, $BL", []>, + RegConstraint<"$ATi = $AT">; + + // MMA+ accumulating/non-accumulating instructions. + def DMXVI8GERX4SPP + : XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), + (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB), + "dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">; + +} // End of [MMA, IsISAFuture] let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def PMDMXVI8GERX4SPP : - MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT), - (ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK, - u4imm:$YMSK, u4imm:$PMSK), - "pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK", - IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PMDMXVI8GERX4SPP + : MMIRR_XX3Form_X8YP4_XAp5B6< + 59, 98, (outs dmr:$AT), + (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB, u8imm:$XMSK, u4imm:$YMSK, + u4imm:$PMSK), + "pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">; } -// DMXVBF16GERX2, DMXVBF16GERX2PP, DMXVBF16GERX2PN, dMXVBF16GERX2NP, DMXVBF16GERX2NN -// PMDMXVBF16GERX2, PMDMXVBF16GERX2PP, PMDMXVBF16GERX2PN, PMDMXVBF16GERX2NP, PMDMXVBF16GERX2NN -defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB), - "dmxvbf16gerx2", "$AT, $XAp, $XB">; - -// DMXVF16GERX2, DMXVF16GERX2PP, DMXVF16GERX2PN, dMXVF16GERX2NP, DMXVF16GERX2NN -// PMDMXVF16GERX2, PMDMXVF16GERX2PP, PMDMXVF16GERX2PN, PMDMXVF16GERX2NP, PMDMXVF16GERX2NN -defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB), - "dmxvf16gerx2", "$AT, $XAp, $XB">; - -// DMF cryptography [support] Instructions -let Predicates = [IsISAFuture] in { - def DMSHA2HASH : - XForm_AT3_T1_AB3<31, 14, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB, u1imm:$T), - "dmsha2hash $AT, $AB, $T", - [(set v1024i1:$AT, (int_ppc_mma_dmsha2hash v1024i1:$ATi, v1024i1:$AB, timm:$T))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - - def DMSHA3HASH : - XForm_ATp2_SR5<31, 15, 177, (outs dmrp:$ATp), (ins dmrp:$ATpi , u5imm:$SR), - "dmsha3hash $ATp, $SR", - [(set v2048i1:$ATp, (int_ppc_mma_dmsha3hash v2048i1:$ATpi, timm:$SR))]>, - RegConstraint<"$ATpi = $ATp">, NoEncode<"$ATpi">; - - def DMXXSHAPAD : - XX2Form_AT3_XB6_ID2_E1_BL2<60, 421, (outs dmr:$AT), - (ins dmr:$ATi, vsrc:$XB, u2imm:$ID, u1imm:$E, u2imm:$BL), - "dmxxshapad $AT, $XB, $ID, $E, $BL", []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; -} +//---------------------------- Anonymous Patterns ----------------------------// +// Predicate combinations available: +// [MMA, IsISAFuture] +// [MMA, PrefixInstrs, IsISAFuture] -// MMA+ Intrinsics let Predicates = [MMA, IsISAFuture] in { + // MMA+ Intrinsics def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)), (DMXVI8GERX4 $XAp, RCCp.BToVSRC)>; - def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>; - def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2 v256i1:$XAp, v16i8:$XB)), (DMXVBF16GERX2 $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVBF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVBF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVBF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>; - def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2 v256i1:$XAp, v16i8:$XB)), (DMXVF16GERX2 $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>; - - def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)), + def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB)), (DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>; + + // Cryptography Intrinsic + def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, timm:$ID, + timm:$E, timm:$BL)), + (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>; } let Predicates = [MMA, PrefixInstrs, IsISAFuture] in { - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)), - (PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk4Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk4Imm:$PMSK)), + (PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk4Imm:$PMSK)>; + + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk4Imm:$PMSK)), (PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk4Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk4Imm:$PMSK)), (PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMDMXVBF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2 v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMDMXVBF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVBF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVBF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVBF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMDMXVF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2 v256i1:$XAp, v16i8:$XB, + Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMDMXVF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB, - Msk8Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), + def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, + v16i8:$XB, Msk8Imm:$XMSK, Msk4Imm:$YMSK, Msk2Imm:$PMSK)), (PMDMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; -} - -// Cryptography Intrinsic -let Predicates = [IsISAFuture] in { - def : Pat<(v1024i1 (int_ppc_mma_dmxxshapad v1024i1:$ATi, v16i8:$XB, timm:$ID, - timm:$E, timm:$BL)), (DMXXSHAPAD $ATi, RCCp.BToVSRC, $ID, $E, $BL)>; + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; } -// MMA+ Instruction aliases -let Predicates = [IsISAFuture] in { - def : InstAlias<"dmsha256hash $AT, $AB", - (DMSHA2HASH dmr:$AT, dmr:$AB, 0)>; +//---------------------------- Instruction aliases ---------------------------// - def : InstAlias<"dmsha512hash $AT, $AB", - (DMSHA2HASH dmr:$AT, dmr:$AB, 1)>; - - def : InstAlias<"dmsha3dw $ATp", - (DMSHA3HASH dmrp:$ATp, 0)>; - - def : InstAlias<"dmcryshash $ATp", - (DMSHA3HASH dmrp:$ATp, 12)>; - - def : InstAlias<"dmxxsha3512pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 0)>; - - def : InstAlias<"dmxxsha3384pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 1)>; - - def : InstAlias<"dmxxsha3256pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 2)>; - - def : InstAlias<"dmxxsha3224pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 0, u1imm:$E, 3)>; - - def : InstAlias<"dmxxshake256pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 0)>; - - def : InstAlias<"dmxxshake128pad $AT, $XB, $E", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 1, u1imm:$E, 1)>; - - def : InstAlias<"dmxxsha384512pad $AT, $XB", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 2, 0, 0)>; - - def : InstAlias<"dmxxsha224256pad $AT, $XB", - (DMXXSHAPAD dmr:$AT, vsrc:$XB, 3, 0, 0)>; +let Predicates = [MMA, IsISAFuture] in { + def : InstAlias<"dmsha256hash $AT, $AB", (DMSHA2HASH dmr:$AT, dmr:$AB, 0)>; + def : InstAlias<"dmsha512hash $AT, $AB", (DMSHA2HASH dmr:$AT, dmr:$AB, 1)>; + def : InstAlias<"dmsha3dw $ATp", (DMSHA3HASH dmrp:$ATp, 0)>; + def : InstAlias<"dmcryshash $ATp", (DMSHA3HASH dmrp:$ATp, 12)>; + def : InstAlias<"dmxxsha3512pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 0, u1imm:$E, 0)>; + def : InstAlias<"dmxxsha3384pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 0, u1imm:$E, 1)>; + def : InstAlias<"dmxxsha3256pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 0, u1imm:$E, 2)>; + def : InstAlias<"dmxxsha3224pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 0, u1imm:$E, 3)>; + def : InstAlias<"dmxxshake256pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 1, u1imm:$E, 0)>; + def : InstAlias<"dmxxshake128pad $AT, $XB, $E", (DMXXSHAPAD dmr:$AT, vsrc:$XB, + 1, u1imm:$E, 1)>; + def : InstAlias<"dmxxsha384512pad $AT, $XB", (DMXXSHAPAD dmr:$AT, vsrc:$XB, 2, + 0, 0)>; + def : InstAlias<"dmxxsha224256pad $AT, $XB", (DMXXSHAPAD dmr:$AT, vsrc:$XB, 3, + 0, 0)>; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 7c1550e9..7cb7e05 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" @@ -1863,6 +1864,48 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcRegSub1) .addReg(SrcRegSub1, getKillRegState(KillSrc)); return; + } else if ((PPC::WACCRCRegClass.contains(DestReg) || + PPC::WACC_HIRCRegClass.contains(DestReg)) && + (PPC::WACCRCRegClass.contains(SrcReg) || + PPC::WACC_HIRCRegClass.contains(SrcReg))) { + + Opc = PPC::WACCRCRegClass.contains(SrcReg) ? PPC::DMXXEXTFDMR512 + : PPC::DMXXEXTFDMR512_HI; + + RegScavenger RS; + RS.enterBasicBlockEnd(MBB); + RS.backward(std::next(I)); + + Register TmpReg1 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I, + /* RestoreAfter */ false, 0, + /* AllowSpill */ false); + + RS.setRegUsed(TmpReg1); + Register TmpReg2 = RS.scavengeRegisterBackwards(PPC::VSRpRCRegClass, I, + /* RestoreAfter */ false, 0, + /* AllowSpill */ false); + + BuildMI(MBB, I, DL, get(Opc)) + .addReg(TmpReg1, RegState::Define) + .addReg(TmpReg2, RegState::Define) + .addReg(SrcReg, getKillRegState(KillSrc)); + + Opc = PPC::WACCRCRegClass.contains(DestReg) ? PPC::DMXXINSTDMR512 + : PPC::DMXXINSTDMR512_HI; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(TmpReg1, RegState::Kill) + .addReg(TmpReg2, RegState::Kill); + + return; + } else if (PPC::DMRRCRegClass.contains(DestReg) && + PPC::DMRRCRegClass.contains(SrcReg)) { + + BuildMI(MBB, I, DL, get(PPC::DMMR), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + + return; + } else llvm_unreachable("Impossible reg-to-reg copy"); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 99ef89a..7cea9a1 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -58,6 +58,10 @@ def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; +def SDT_PPCVecShiftQuad : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2> +]>; + def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; @@ -157,6 +161,8 @@ def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; +def PPCvsrq: SDNode<"PPCISD::VSRQ", SDT_PPCVecShiftQuad, []>; + def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", SDTFPUnaryOp, [SDNPHasChain]>; def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", @@ -365,16 +371,6 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; -def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>; -def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc, - [SDNPHasChain, SDNPSideEffect]>; - -def PPCclrbhrb : SDNode<"PPCISD::CLRBHRB", SDTNone, - [SDNPHasChain, SDNPSideEffect]>; -def PPCmfbhrbe : SDNode<"PPCISD::MFBHRBE", SDTIntBinOp, [SDNPHasChain]>; -def PPCrfebb : SDNode<"PPCISD::RFEBB", SDT_PPCsc, - [SDNPHasChain, SDNPSideEffect]>; - def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -675,9 +671,6 @@ class isRecordForm { bit RC = 1; } class RegConstraint<string C> { string Constraints = C; } -class NoEncode<string E> { - string DisableEncoding = E; -} // Define PowerPC specific addressing mode. @@ -1673,7 +1666,7 @@ let isBranch = 1, isTerminator = 1, Size = 0 in { // System call. let PPC970_Unit = 7 in { def SC : SCForm<17, 1, 0, (outs), (ins i32imm:$LEV), - "sc $LEV", IIC_BrB, [(PPCsc (i32 imm:$LEV))]>; + "sc $LEV", IIC_BrB, []>; } // We mark SCV as having no scheduling model since it is only meant to be used @@ -1685,21 +1678,14 @@ let Predicates = [IsISA3_0], hasNoSchedulingInfo = 1 in { } // Branch history rolling buffer. -def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, - [(PPCclrbhrb)]>, +def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB, []>, PPC970_DGroup_Single; -// The $dmy argument used for MFBHRBE is not needed; however, including -// it avoids automatic generation of PPCFastISel::fastEmit_i(), which -// interferes with necessary special handling (see PPCFastISel.cpp). -def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$RT), - (ins u10imm:$imm, u10imm:$dmy), - "mfbhrbe $RT, $imm", IIC_BrB, - [(set i32:$RT, - (PPCmfbhrbe imm:$imm, imm:$dmy))]>, + +def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$RT), (ins u10imm:$imm), + "mfbhrbe $RT, $imm", IIC_BrB, []>, PPC970_DGroup_First; -def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$S), "rfebb $S", - IIC_BrB, [(PPCrfebb (i32 imm:$S))]>, +def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$S), "rfebb $S", IIC_BrB, []>, PPC970_DGroup_Single; def : InstAlias<"rfebb", (RFEBB 1)>; @@ -2006,29 +1992,24 @@ def LBZU : DForm_1<35, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, def LHAU : DForm_1<43, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lhau $RST, $addr", IIC_LdStLHAU, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LHZU : DForm_1<41, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lhzu $RST, $addr", IIC_LdStLoadUpd, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LWZU : DForm_1<33, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lwzu $RST, $addr", IIC_LdStLoadUpd, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; let Predicates = [HasFPU] in { def LFSU : DForm_1<49, (outs f4rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lfsu $RST, $addr", IIC_LdStLFDU, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; def LFDU : DForm_1<51, (outs f8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr), "lfdu $RST, $addr", IIC_LdStLFDU, - []>, RegConstraint<"$addr.reg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.reg = $ea_result">; } @@ -2036,39 +2017,33 @@ def LFDU : DForm_1<51, (outs f8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, def LBZUX : XForm_1_memOp<31, 119, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lbzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LHAUX : XForm_1_memOp<31, 375, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lhaux $RST, $addr", IIC_LdStLHAUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LHZUX : XForm_1_memOp<31, 311, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lhzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LWZUX : XForm_1_memOp<31, 55, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lwzux $RST, $addr", IIC_LdStLoadUpdX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; let Predicates = [HasFPU] in { def LFSUX : XForm_1_memOp<31, 567, (outs f4rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lfsux $RST, $addr", IIC_LdStLFDUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memrr $RA, $RB):$addr), "lfdux $RST, $addr", IIC_LdStLFDUX, - []>, RegConstraint<"$addr.ptrreg = $ea_result">, - NoEncode<"$ea_result">; + []>, RegConstraint<"$addr.ptrreg = $ea_result">; } } } @@ -2149,20 +2124,20 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$RST, (memri $D, $RA):$dst), let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst), "stbu $RST, $dst", IIC_LdStSTU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$dst.reg = $ea_res">; def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst), "sthu $RST, $dst", IIC_LdStSTU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$dst.reg = $ea_res">; def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst), "stwu $RST, $dst", IIC_LdStSTU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$dst.reg = $ea_res">; let Predicates = [HasFPU] in { def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$RST, (memri $D, $RA):$dst), "stfsu $RST, $dst", IIC_LdStSTFDU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$dst.reg = $ea_res">; def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$RST, (memri $D, $RA):$dst), "stfdu $RST, $dst", IIC_LdStSTFDU, []>, - RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + RegConstraint<"$dst.reg = $ea_res">; } } @@ -2224,32 +2199,27 @@ def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memrr $RA, $RB):$addr), "stbux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memrr $RA, $RB):$addr), "sthux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memrr $RA, $RB):$addr), "stwux $RST, $addr", IIC_LdStSTUX, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; let Predicates = [HasFPU] in { def STFSUX: XForm_8_memOp<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$RST, (memrr $RA, $RB):$addr), "stfsux $RST, $addr", IIC_LdStSTFDU, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; def STFDUX: XForm_8_memOp<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$RST, (memrr $RA, $RB):$addr), "stfdux $RST, $addr", IIC_LdStSTFDU, []>, RegConstraint<"$addr.ptrreg = $ea_res">, - NoEncode<"$ea_res">, PPC970_DGroup_Cracked; } } @@ -3116,7 +3086,7 @@ defm RLWIMI : MForm_2r<20, (outs gprc:$RA), (ins gprc:$RAi, gprc:$RS, u5imm:$SH, u5imm:$MB, u5imm:$ME), "rlwimi", "$RA, $RS, $SH, $MB, $ME", IIC_IntRotate, []>, PPC970_DGroup_Cracked, - RegConstraint<"$RAi = $RA">, NoEncode<"$RAi">; + RegConstraint<"$RAi = $RA">; } let BaseName = "rlwinm" in { def RLWINM : MForm_2<21, @@ -3252,9 +3222,10 @@ def PPC32GOT: PPCEmitTimePseudo<(outs gprc:$rD), (ins), "#PPC32GOT", // Get the _GLOBAL_OFFSET_TABLE_ in PIC mode. // This uses two output registers, the first as the real output, the second as a -// temporary register, used internally in code generation. +// temporary register, used internally in code generation. A "bl" also clobbers LR. +let Defs = [LR] in def PPC32PICGOT: PPCEmitTimePseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT", - []>, NoEncode<"$rT">; + []>; def LDgotTprelL32: PPCEmitTimePseudo<(outs gprc_nor0:$rD), (ins s16imm:$disp, gprc_nor0:$reg), "#LDgotTprelL32", @@ -4984,44 +4955,44 @@ defm : BranchSimpleMnemonic1<"dzf", "", 2>; multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> { def : InstAlias<"b"#name#pm#" $cc, $dst", - (BCC bibo, crrc:$cc, condbrtarget:$dst)>; + (BCC (pred bibo, crrc:$cc), condbrtarget:$dst)>; def : InstAlias<"b"#name#pm#" $dst", - (BCC bibo, CR0, condbrtarget:$dst)>; + (BCC (pred bibo, CR0), condbrtarget:$dst)>; def : InstAlias<"b"#name#"a"#pm#" $cc, $dst", - (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>; + (BCCA (pred bibo, crrc:$cc), abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"a"#pm#" $dst", - (BCCA bibo, CR0, abscondbrtarget:$dst)>; + (BCCA (pred bibo, CR0), abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lr"#pm#" $cc", - (BCCLR bibo, crrc:$cc)>; + (BCCLR (pred bibo, crrc:$cc))>; def : InstAlias<"b"#name#"lr"#pm, - (BCCLR bibo, CR0)>; + (BCCLR (pred bibo, CR0))>; def : InstAlias<"b"#name#"ctr"#pm#" $cc", - (BCCCTR bibo, crrc:$cc)>; + (BCCCTR (pred bibo, crrc:$cc))>; def : InstAlias<"b"#name#"ctr"#pm, - (BCCCTR bibo, CR0)>; + (BCCCTR (pred bibo, CR0))>; def : InstAlias<"b"#name#"l"#pm#" $cc, $dst", - (BCCL bibo, crrc:$cc, condbrtarget:$dst)>; + (BCCL (pred bibo, crrc:$cc), condbrtarget:$dst)>; def : InstAlias<"b"#name#"l"#pm#" $dst", - (BCCL bibo, CR0, condbrtarget:$dst)>; + (BCCL (pred bibo, CR0), condbrtarget:$dst)>; def : InstAlias<"b"#name#"la"#pm#" $cc, $dst", - (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>; + (BCCLA (pred bibo, crrc:$cc), abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"la"#pm#" $dst", - (BCCLA bibo, CR0, abscondbrtarget:$dst)>; + (BCCLA (pred bibo, CR0), abscondbrtarget:$dst)>; def : InstAlias<"b"#name#"lrl"#pm#" $cc", - (BCCLRL bibo, crrc:$cc)>; + (BCCLRL (pred bibo, crrc:$cc))>; def : InstAlias<"b"#name#"lrl"#pm, - (BCCLRL bibo, CR0)>; + (BCCLRL (pred bibo, CR0))>; def : InstAlias<"b"#name#"ctrl"#pm#" $cc", - (BCCCTRL bibo, crrc:$cc)>; + (BCCCTRL (pred bibo, crrc:$cc))>; def : InstAlias<"b"#name#"ctrl"#pm, - (BCCCTRL bibo, CR0)>; + (BCCCTRL (pred bibo, CR0))>; } multiclass BranchExtendedMnemonic<string name, int bibo> { defm : BranchExtendedMnemonicPM<name, "", bibo>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td index 436715a..b38dd4a 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -14,7 +14,7 @@ multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, def PP : XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { def NAME#W : @@ -24,7 +24,7 @@ multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, def WPP : XX3Form_AT3_XAB6<opcode, xo, (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -48,7 +48,7 @@ multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -65,7 +65,7 @@ multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -89,7 +89,7 @@ multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -106,7 +106,7 @@ multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -129,7 +129,7 @@ multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -145,7 +145,7 @@ multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -162,7 +162,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, XX3Form_AT3_XAB6< opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in { def PM#NAME : @@ -179,7 +179,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { def NAME#W : @@ -190,7 +190,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, XX3Form_AT3_XAB6< opcode, !or(xo, 0x20), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -207,7 +207,7 @@ multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase, !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -220,29 +220,29 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, def PN : XX3Form_AT3_XAB6< opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def NP : XX3Form_AT3_XAB6< opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def NN : XX3Form_AT3_XAB6< opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { def WPN : XX3Form_AT3_XAB6< opcode, !or(xo, 0x80), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def WNP : XX3Form_AT3_XAB6< opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def WNN : XX3Form_AT3_XAB6< opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in { def PM#NAME#PN : @@ -251,21 +251,21 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NP : MMIRR_XX3Form_XY4P2_XAB6< opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NN : MMIRR_XX3Form_XY4P2_XAB6< opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#WPN : @@ -274,21 +274,21 @@ multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNP : MMIRR_XX3Form_XY4P2_XAB6< opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNN : MMIRR_XX3Form_XY4P2_XAB6< opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -301,29 +301,29 @@ multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { def WPN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def WNP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def WNN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), IOL), !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -346,28 +346,28 @@ multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#PN : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NP : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NN : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -383,28 +383,28 @@ multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WPN : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0x80), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNP : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNN : MMIRR_XX3Form_XY4_XAB6< opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -427,28 +427,28 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#PN : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NP : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#NN : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { def PM#NAME#W : @@ -464,28 +464,28 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL, !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WPN : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0x80), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNP : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0x40), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def PM#NAME#WNN : MMIRR_XX3Form_X4Y2_XAB6< opcode, !or(xo, 0xC0), (outs wacc:$AT), !con((ins wacc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } } @@ -497,12 +497,12 @@ let Predicates = [MMA, IsNotISAFuture] in { XForm_AT3<31, 0, 177, (outs acc:$ATo), (ins acc:$AT), "xxmfacc $AT", IIC_VecGeneral, [(set v512i1:$ATo, (int_ppc_mma_xxmfacc v512i1:$AT))]>, - RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">; + RegConstraint<"$ATo = $AT">; def XXMTACC : XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", IIC_VecGeneral, [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), "#KILL_PAIR", []>, RegConstraint<"$XTp = $XSp">; @@ -519,7 +519,7 @@ let Predicates = [MMA, IsNotISAFuture] in { def XVI8GER4SPP : XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; let mayStore = 1 in { def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), "#SPILL_ACC", []>; @@ -544,11 +544,11 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { def XXMFACCW : XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "xxmfacc $AT", IIC_VecGeneral, []>, - RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">; + RegConstraint<"$ATo = $AT">; def XXMTACCW : XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "xxmtacc $AT", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; let isAsCheapAsAMove = 1, isReMaterializable = 1 in { def DMXXSETACCZ : @@ -560,7 +560,7 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in { XX3Form_AT3_XAB6<59, 99, (outs wacc:$AT), (ins wacc:$ATi, vsrc:$XA, vsrc:$XB), "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; let mayStore = 1 in { def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst), @@ -593,7 +593,7 @@ let Predicates = [MMA, PrefixInstrs, IsNotISAFuture] in { u4imm:$YMSK, u4imm:$PMSK), "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { @@ -603,7 +603,7 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture], isCodeGenOnly = 1 in { u4imm:$YMSK, u4imm:$PMSK), "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + RegConstraint<"$ATi = $AT">; } // MMA accumulating/non-accumulating instructions. diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 1dc485d..3a9b64c8 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -117,7 +117,6 @@ def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { field bits<64> Inst; - field bits<64> SoftFail = 0; bit PCRel = 0; // Default value, set by isPCRel. let Size = 8; @@ -383,14 +382,14 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr, class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern> : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VDi, gprc:$VA, vrrc:$VB), !strconcat(opc, " $VD, $VA, $VB"), IIC_VecGeneral, pattern>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; // VX-Form: [PO VRT RA RB XO]. // Destructive (insert) forms are suffixed with _ins. class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern> : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VDi, gprc:$VA, gprc:$VB), !strconcat(opc, " $VD, $VA, $VB"), IIC_VecGeneral, pattern>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; // VX-Form: [ PO BF // VRA VRB XO ] class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr, @@ -1396,7 +1395,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P [(set v2i64:$XT, (PPCxxsplti32dx v2i64:$XTi, i32:$IX, i32:$IMM32))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + RegConstraint<"$XTi = $XT">; } let Predicates = [IsISA3_1] in { @@ -1467,13 +1466,13 @@ let Predicates = [IsISA3_1] in { "vinsw $VD, $VB, $VA", IIC_VecGeneral, [(set v4i32:$VD, (int_ppc_altivec_vinsw v4i32:$VDi, i32:$VB, timm:$VA))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VINSD : VXForm_1<463, (outs vrrc:$VD), (ins vrrc:$VDi, u4imm:$VA, g8rc:$VB), "vinsd $VD, $VB, $VA", IIC_VecGeneral, [(set v2i64:$VD, (int_ppc_altivec_vinsd v2i64:$VDi, i64:$VB, timm:$VA))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VINSBVLX : VXForm_VTB5_RA5_ins<15, "vinsbvlx", [(set v16i8:$VD, @@ -1539,13 +1538,13 @@ let Predicates = [IsISA3_1] in { "vinsdlx $VD, $VA, $VB", IIC_VecGeneral, [(set v2i64:$VD, (int_ppc_altivec_vinsdlx v2i64:$VDi, i64:$VA, i64:$VB))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VINSDRX : VXForm_1<975, (outs vrrc:$VD), (ins vrrc:$VDi, g8rc:$VA, g8rc:$VB), "vinsdrx $VD, $VA, $VB", IIC_VecGeneral, [(set v2i64:$VD, (int_ppc_altivec_vinsdrx v2i64:$VDi, i64:$VA, i64:$VB))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$VD), (ins vrrc:$VB), "vextractbm $VD, $VB", IIC_VecGeneral, [(set i32:$VD, @@ -1916,10 +1915,11 @@ let Predicates = [IsISA3_1] in { [(set v1i128:$VD, (int_ppc_altivec_vrlqmi v1i128:$VA, v1i128:$VB, v1i128:$VDi))]>, - RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">; + RegConstraint<"$VDi = $VD">; def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; - def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; + def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", + [(set v4i32:$VD, (PPCvsrq v4i32:$VA, v4i32:$VB))]>; def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; @@ -2054,6 +2054,9 @@ let Predicates = [IsISA3_1, HasFPU] in { //---------------------------- Anonymous Patterns ----------------------------// let Predicates = [IsISA3_1] in { + // Exploit vsrq instruction to optimize VSR(VSRO (input, vsro_byte_shift), vsr_bit_shift) + // to VSRQ(input, vsrq_bit_shift) + def : Pat<(VSRVSRO v4i32:$vA, v4i32:$vB), (VSRQ $vA, $vB)>; // Exploit the vector multiply high instructions using intrinsics. def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), (v4i32 (VMULHSW $vA, $vB))>; @@ -2175,10 +2178,7 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in { // - Other vector types [v16i8, v8i16] require COPY_TO_REGCLASS to/from VRRC // ============================================================================= -class XXEvalPattern<dag pattern, bits<8> imm> - : Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {} - -class XXEvalPatterns<ValueType Vt, dag InputPattern, bits<8> Imm> +class XXEvalPattern<ValueType Vt, dag InputPattern, bits<8> Imm> : Pat<(Vt InputPattern), !if(!or(!eq(Vt, v4i32), !eq(Vt, v2i64)), // VSRC path: direct XXEVAL for v4i32 and v2i64 @@ -2234,6 +2234,13 @@ def VEqv (v4i32(bitconvert node:$a)), (v4i32(bitconvert node:$b)))))]>; +// Vector NAND operation (not(and)) +def VNand + : PatFrags<(ops node:$a, node:$b), [(vnot(and node:$a, node:$b)), + (bitconvert(vnot(and + (v4i32(bitconvert node:$a)), + (v4i32(bitconvert node:$b)))))]>; + // ============================================================================= // XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectAnd // This class matches the equivalent Ternary Operation: A ? f(B,C) : AND(B,C) @@ -2246,29 +2253,79 @@ def VEqv // ============================================================================= multiclass XXEvalTernarySelectAnd<ValueType Vt> { // Pattern: A ? XOR(B,C) : AND(B,C) XXEVAL immediate value: 22 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VXor Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 22>; // Pattern: A ? NOR(B,C) : AND(B,C) XXEVAL immediate value: 24 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 24>; // Pattern: A ? EQV(B,C) : AND(B,C) XXEVAL immediate value: 25 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 25>; // Pattern: A ? NOT(C) : AND(B,C) XXEVAL immediate value: 26 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNot Vt:$vC), (VAnd Vt:$vB, Vt:$vC)), 26>; // Pattern: A ? NOT(B) : AND(B,C) XXEVAL immediate value: 28 - def : XXEvalPatterns< + def : XXEvalPattern< Vt, (vselect Vt:$vA, (VNot Vt:$vB), (VAnd Vt:$vB, Vt:$vC)), 28>; } +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectB +// This class matches the equivalent Ternary Operation: A ? f(B,C) : B +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op on vectors B and C (AND, NOR, EQV, NAND) +// - B is the "false" case operand (vector B) +// +// Note: Patterns (A? C : B) and (A? not(C) : B) are not considered +// for XXEVAL instruction (4 Cycle) as XXSEL (3 cycle) instruction performs +// better. +// ============================================================================= +multiclass XXEvalTernarySelectB<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : B) XXEVAL immediate value: 49 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), Vt:$vB), 49>; + // Pattern: (A ? NOR(B,C) : B) XXEVAL immediate value: 56 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), Vt:$vB), 56>; + // Pattern: (A ? EQV(B,C) : B) XXEVAL immediate value: 57 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), Vt:$vB), 57>; + // Pattern: (A ? NAND(B,C) : B) XXEVAL immediate value: 62 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), Vt:$vB), 62>; +} + +// ============================================================================= +// XXEVAL Ternary Pattern Multiclass: XXEvalTernarySelectC +// This class matches the equivalent Ternary Operation: A ? f(B,C) : C +// and emit the corresponding xxeval instruction with the imm value. +// +// The patterns implement xxeval vector select operations where: +// - A is the selector vector +// - f(B,C) is the "true" case op on vectors B and C (AND, NOR, EQV, NAND) +// - C is the "false" case operand (vector C) +// +// Note: Patterns (A? B : C) and (A? not(B) : C) are not considered +// for XXEVAL instruction (4 Cycle) as XXSEL (3 cycle) instruction performs +// better. +// ============================================================================= +multiclass XXEvalTernarySelectC<ValueType Vt>{ + // Pattern: (A ? AND(B,C) : C) XXEVAL immediate value: 81 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VAnd Vt:$vB, Vt:$vC), Vt:$vC), 81>; + // Pattern: (A ? NOR(B,C) : C) XXEVAL immediate value: 88 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNor Vt:$vB, Vt:$vC), Vt:$vC), 88>; + // Pattern: (A ? EQV(B,C) : C) XXEVAL immediate value: 89 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VEqv Vt:$vB, Vt:$vC), Vt:$vC), 89>; + // Pattern: (A ? NAND(B,C) : C) XXEVAL immediate value: 94 + def : XXEvalPattern<Vt, (vselect Vt:$vA, (VNand Vt:$vB, Vt:$vC), Vt:$vC), 94>; +} + let Predicates = [PrefixInstrs, HasP10Vector] in { let AddedComplexity = 400 in { def : Pat<(v4i32 (build_vector i32immNonAllOneNonZero:$A, @@ -2299,87 +2356,89 @@ let Predicates = [PrefixInstrs, HasP10Vector] in { // Anonymous patterns for XXEVAL // AND // and(A, B, C) - def : XXEvalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>; // and(A, xor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>; // and(A, or(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>; // and(A, nor(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>; // and(A, eqv(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>; // and(A, nand(B, C)) - def : XXEvalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; + def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>; // NAND // nand(A, B, C) - def : XXEvalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), !sub(255, 1)>; // nand(A, xor(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), !sub(255, 6)>; // nand(A, or(B, C)) - def : XXEvalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), + def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), !sub(255, 7)>; // nand(A, nor(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), !sub(255, 8)>; // nand(A, eqv(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), !sub(255, 9)>; // nand(A, nand(B, C)) - def : XXEvalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), !sub(255, 14)>; // EQV // (eqv A, B, C) - def : XXEvalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), + def : XXEvalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))), 150>; // (eqv A, (and B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>; + def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>; // (eqv A, (or B, C)) - def : XXEvalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>; + def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>; // NOR // (nor A, B, C) - def : XXEvalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>; // (nor A, (and B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>; // (nor A, (eqv B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>; // (nor A, (nand B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>; // (nor A, (nor B, C)) - def : XXEvalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>; + def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>; // (nor A, (xor B, C)) - def : XXEvalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>; + def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>; // OR // (or A, B, C) - def : XXEvalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>; // (or A, (and B, C)) - def : XXEvalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>; // (or A, (eqv B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>; // (or A, (nand B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>; // (or A, (nor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>; // (or A, (xor B, C)) - def : XXEvalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>; + def : XXEvalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>; // XOR // (xor A, B, C) - def : XXEvalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>; // (xor A, (and B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>; // (xor A, (or B, C)) - def : XXEvalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>; + def : XXEvalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>; // XXEval Patterns for ternary Operations. foreach Ty = [v4i32, v2i64, v8i16, v16i8] in { defm : XXEvalTernarySelectAnd<Ty>; + defm : XXEvalTernarySelectB<Ty>; + defm : XXEvalTernarySelectC<Ty>; } // Anonymous patterns to select prefixed VSX loads and stores. diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 1944821..4e5165b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -236,7 +236,7 @@ class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc, list<dag> pattern> : XForm_1<opcode, xo, (outs vrrc:$RST), (ins vrrc:$RSTi, vrrc:$RA, vrrc:$RB), !strconcat(opc, " $RST, $RA, $RB"), IIC_VecFP, pattern>, - RegConstraint<"$RSTi = $RST">, NoEncode<"$RSTi">; + RegConstraint<"$RSTi = $RST">; // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /] class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc, @@ -402,13 +402,13 @@ let hasSideEffects = 0 in { (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (any_fma f64:$XA, f64:$XB, f64:$XTi))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMADDMDP : XX3Form<60, 41, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -418,13 +418,13 @@ let hasSideEffects = 0 in { (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSMSUBMDP : XX3Form<60, 57, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -434,13 +434,13 @@ let hasSideEffects = 0 in { (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, f64:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMADDMDP : XX3Form<60, 169, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -450,13 +450,13 @@ let hasSideEffects = 0 in { (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubadp $XT, $XA, $XB", IIC_VecFP, [(set f64:$XT, (fneg (any_fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XSNMSUBMDP : XX3Form<60, 185, (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB), "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -466,13 +466,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMADDMDP : XX3Form<60, 105, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -482,13 +482,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMADDMSP : XX3Form<60, 73, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -498,13 +498,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMSUBMDP : XX3Form<60, 121, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -514,13 +514,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVMSUBMSP : XX3Form<60, 89, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -530,13 +530,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMADDMDP : XX3Form<60, 233, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -546,13 +546,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMADDMSP : XX3Form<60, 201, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -562,13 +562,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubadp $XT, $XA, $XB", IIC_VecFP, [(set v2f64:$XT, (fneg (any_fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMSUBMDP : XX3Form<60, 249, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -578,13 +578,13 @@ let hasSideEffects = 0 in { (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubasp $XT, $XA, $XB", IIC_VecFP, [(set v4f32:$XT, (fneg (any_fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; let IsVSXFMAAlt = 1 in def XVNMSUBMSP : XX3Form<60, 217, (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB), "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -1199,7 +1199,7 @@ let Predicates = [HasVSX, HasP8Vector] in { (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (any_fma f32:$XA, f32:$XB, f32:$XTi))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; // FIXME: Setting the hasSideEffects flag here to match current behaviour. let IsVSXFMAAlt = 1, hasSideEffects = 1 in @@ -1207,7 +1207,7 @@ let Predicates = [HasVSX, HasP8Vector] in { (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -1219,7 +1219,7 @@ let Predicates = [HasVSX, HasP8Vector] in { "xsmsubasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; // FIXME: Setting the hasSideEffects flag here to match current behaviour. let IsVSXFMAAlt = 1, hasSideEffects = 1 in @@ -1227,7 +1227,7 @@ let Predicates = [HasVSX, HasP8Vector] in { (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -1239,7 +1239,7 @@ let Predicates = [HasVSX, HasP8Vector] in { "xsnmaddasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, f32:$XTi)))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; // FIXME: Setting the hasSideEffects flag here to match current behaviour. let IsVSXFMAAlt = 1, hasSideEffects = 1 in @@ -1247,7 +1247,7 @@ let Predicates = [HasVSX, HasP8Vector] in { (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -1259,7 +1259,7 @@ let Predicates = [HasVSX, HasP8Vector] in { "xsnmsubasp $XT, $XA, $XB", IIC_VecFP, [(set f32:$XT, (fneg (any_fma f32:$XA, f32:$XB, (fneg f32:$XTi))))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; // FIXME: Setting the hasSideEffects flag here to match current behaviour. let IsVSXFMAAlt = 1, hasSideEffects = 1 in @@ -1267,7 +1267,7 @@ let Predicates = [HasVSX, HasP8Vector] in { (outs vssrc:$XT), (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB), "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, AltVSXFMARel; } @@ -1563,7 +1563,7 @@ let Predicates = [HasVSX, HasP9Vector] in { "xxinsertw $XT, $XB, $UIM5", IIC_VecFP, [(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB, imm32SExt16:$UIM5))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + RegConstraint<"$XTi = $XT">; // Vector Extract Unsigned Word // FIXME: Setting the hasSideEffects flag here to match current behaviour. @@ -1652,11 +1652,11 @@ let Predicates = [HasVSX, HasP9Vector] in { def XXPERM : XX3Form<60, 26, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB), "xxperm $XT, $XA, $XB", IIC_VecPerm, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + RegConstraint<"$XTi = $XT">; def XXPERMR : XX3Form<60, 58, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB), "xxpermr $XT, $XA, $XB", IIC_VecPerm, []>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + RegConstraint<"$XTi = $XT">; // Vector Splat Immediate Byte def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8), diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 709d7e7..3640d25 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -80,7 +80,6 @@ #include "PPCTargetMachine.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" @@ -264,9 +263,8 @@ namespace { bool prepareBasesForCommoningChains(Bucket &BucketChain); /// Rewrite load/store according to the common chains. - bool - rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket, - SmallSet<BasicBlock *, 16> &BBChanged); + bool rewriteLoadStoresForCommoningChains( + Loop *L, Bucket &Bucket, SmallPtrSet<BasicBlock *, 16> &BBChanged); /// Collect condition matched(\p isValidCandidate() returns true) /// candidates in Loop \p L. @@ -309,7 +307,7 @@ namespace { /// Rewrite load/store instructions in \p BucketChain according to /// preparation. bool rewriteLoadStores(Loop *L, Bucket &BucketChain, - SmallSet<BasicBlock *, 16> &BBChanged, + SmallPtrSet<BasicBlock *, 16> &BBChanged, PrepForm Form); /// Rewrite for the base load/store of a chain. @@ -523,7 +521,7 @@ bool PPCLoopInstrFormPrep::chainCommoning(Loop *L, if (Buckets.empty()) return MadeChange; - SmallSet<BasicBlock *, 16> BBChanged; + SmallPtrSet<BasicBlock *, 16> BBChanged; for (auto &Bucket : Buckets) { if (prepareBasesForCommoningChains(Bucket)) @@ -537,7 +535,7 @@ bool PPCLoopInstrFormPrep::chainCommoning(Loop *L, } bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains( - Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) { + Loop *L, Bucket &Bucket, SmallPtrSet<BasicBlock *, 16> &BBChanged) { bool MadeChange = false; assert(Bucket.Elements.size() == @@ -1006,7 +1004,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) { } bool PPCLoopInstrFormPrep::rewriteLoadStores( - Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged, + Loop *L, Bucket &BucketChain, SmallPtrSet<BasicBlock *, 16> &BBChanged, PrepForm Form) { bool MadeChange = false; @@ -1089,7 +1087,7 @@ bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L, bool MadeChange = false; if (Buckets.empty()) return MadeChange; - SmallSet<BasicBlock *, 16> BBChanged; + SmallPtrSet<BasicBlock *, 16> BBChanged; for (auto &Bucket : Buckets) // The base address of each bucket is transformed into a phi and the others // are rewritten based on new base. @@ -1110,7 +1108,7 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, if (Buckets.empty()) return MadeChange; - SmallSet<BasicBlock *, 16> BBChanged; + SmallPtrSet<BasicBlock *, 16> BBChanged; for (auto &Bucket : Buckets) { if (Bucket.Elements.size() < DispFormPrepMinThreshold) continue; diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp index 0ffd35d..74bce43 100644 --- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp +++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp @@ -248,6 +248,10 @@ static bool splitMBB(BlockSplitInfo &BSI) { } addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); + // Set the call frame size on ThisMBB to the new basic blocks. + // See https://reviews.llvm.org/D156113. + NewMBB->setCallFrameSize(TII->getCallFrameSizeAt(ThisMBB->back())); + LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 76dca47..f123040 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, SpillsKnownBit = true; break; default: + // When spilling a CR bit, the super register may not be explicitly defined + // (i.e. it can be defined by a CR-logical that only defines the subreg) so + // we state that the CR field is undef. Also, in order to preserve the kill + // flag on the CR bit, we add it as an implicit use. + // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit // register), and SETNBC will set this. if (Subtarget.isISA3_1()) { BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) - .addReg(SrcReg, RegState::Undef); + .addReg(SrcReg, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | + getKillRegState(MI.getOperand(0).isKill())); break; } @@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT || SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) { BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg) - .addReg(getCRFromCRBit(SrcReg), RegState::Undef); + .addReg(getCRFromCRBit(SrcReg), RegState::Undef) + .addReg(SrcReg, RegState::Implicit | + getKillRegState(MI.getOperand(0).isKill())); break; } } // We need to move the CR field that contains the CR bit we are spilling. - // The super register may not be explicitly defined (i.e. it can be defined - // by a CR-logical that only defines the subreg) so we state that the CR - // field is undef. Also, in order to preserve the kill flag on the CR bit, - // we add it as an implicit use. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) .addReg(getCRFromCRBit(SrcReg), RegState::Undef) .addReg(SrcReg, diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index 95de9f3..4039fed 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -22,3 +22,9 @@ bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE && Opcode <= PPCISD::LAST_STRICTFP_OPCODE; } + +std::pair<SDValue, SDValue> PPCSelectionDAGInfo::EmitTargetCodeForMemcmp( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, const CallInst *CI) const { + return DAG.getMemcmp(Chain, dl, Op1, Op2, Op3, CI); +} diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 08e2ddb..1537851 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -20,6 +20,11 @@ public: bool isTargetMemoryOpcode(unsigned Opcode) const override; bool isTargetStrictFPOpcode(unsigned Opcode) const override; + + std::pair<SDValue, SDValue> + EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Op1, SDValue Op2, SDValue Op3, + const CallInst *CI) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index b5c6ac1..ae92d5e 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -129,7 +129,7 @@ LLVMInitializePowerPCTarget() { initializePPCLoopInstrFormPrepPass(PR); initializePPCTOCRegDepsPass(PR); initializePPCEarlyReturnPass(PR); - initializePPCVSXCopyPass(PR); + initializePPCVSXWACCCopyPass(PR); initializePPCVSXFMAMutatePass(PR); initializePPCVSXSwapRemovalPass(PR); initializePPCReduceCRLogicalsPass(PR); @@ -528,7 +528,7 @@ bool PPCPassConfig::addInstSelector() { addPass(createPPCCTRLoopsVerify()); #endif - addPass(createPPCVSXCopyPass()); + addPass(createPPCVSXWACCCopyPass()); return false; } diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp deleted file mode 100644 index 794095c..0000000 --- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ /dev/null @@ -1,159 +0,0 @@ -//===-------------- PPCVSXCopy.cpp - VSX Copy Legalization ----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A pass which deals with the complexity of generating legal VSX register -// copies to/from register classes which partially overlap with the VSX -// register file. -// -//===----------------------------------------------------------------------===// - -#include "PPC.h" -#include "PPCInstrInfo.h" -#include "PPCTargetMachine.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -#define DEBUG_TYPE "ppc-vsx-copy" - -namespace { - // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers - // (Altivec and scalar floating-point registers), we need to transform the - // copies into subregister copies with other restrictions. - struct PPCVSXCopy : public MachineFunctionPass { - static char ID; - PPCVSXCopy() : MachineFunctionPass(ID) {} - - const TargetInstrInfo *TII; - - bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, - MachineRegisterInfo &MRI) { - if (Register::isVirtualRegister(Reg)) { - return RC->hasSubClassEq(MRI.getRegClass(Reg)); - } else if (RC->contains(Reg)) { - return true; - } - - return false; - } - - bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) { - return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI); - } - - bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) { - return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI); - } - - bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) { - return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); - } - - bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { - return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); - } - - bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) { - return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI); - } - -protected: - bool processBlock(MachineBasicBlock &MBB) { - bool Changed = false; - - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - for (MachineInstr &MI : MBB) { - if (!MI.isFullCopy()) - continue; - - MachineOperand &DstMO = MI.getOperand(0); - MachineOperand &SrcMO = MI.getOperand(1); - - if ( IsVSReg(DstMO.getReg(), MRI) && - !IsVSReg(SrcMO.getReg(), MRI)) { - // This is a copy *to* a VSX register from a non-VSX register. - Changed = true; - - const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass; - assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVSSReg(SrcMO.getReg(), MRI) || - IsVSFReg(SrcMO.getReg(), MRI)) && - "Unknown source for a VSX copy"); - - Register NewVReg = MRI.createVirtualRegister(SrcRC); - BuildMI(MBB, MI, MI.getDebugLoc(), - TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) - .addImm(1) // add 1, not 0, because there is no implicit clearing - // of the high bits. - .add(SrcMO) - .addImm(PPC::sub_64); - - // The source of the original copy is now the new virtual register. - SrcMO.setReg(NewVReg); - } else if (!IsVSReg(DstMO.getReg(), MRI) && - IsVSReg(SrcMO.getReg(), MRI)) { - // This is a copy *from* a VSX register to a non-VSX register. - Changed = true; - - const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass; - assert((IsF8Reg(DstMO.getReg(), MRI) || - IsVSFReg(DstMO.getReg(), MRI) || - IsVSSReg(DstMO.getReg(), MRI)) && - "Unknown destination for a VSX copy"); - - // Copy the VSX value into a new VSX register of the correct subclass. - Register NewVReg = MRI.createVirtualRegister(DstRC); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), - NewVReg) - .add(SrcMO); - - // Transform the original copy into a subregister extraction copy. - SrcMO.setReg(NewVReg); - SrcMO.setSubReg(PPC::sub_64); - } - } - - return Changed; - } - -public: - bool runOnMachineFunction(MachineFunction &MF) override { - // If we don't have VSX on the subtarget, don't do anything. - const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); - if (!STI.hasVSX()) - return false; - TII = STI.getInstrInfo(); - - bool Changed = false; - - for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) - if (processBlock(B)) - Changed = true; - - return Changed; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); - } - }; - } // end anonymous namespace - -INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, - "PowerPC VSX Copy Legalization", false, false) - -char PPCVSXCopy::ID = 0; -FunctionPass* -llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } diff --git a/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp new file mode 100644 index 0000000..2ec566d --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCVSXWACCCopy.cpp @@ -0,0 +1,182 @@ +//===--------- PPCVSXWACCCopy.cpp - VSX and WACC Copy Legalization --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A pass which deals with the complexity of generating legal VSX register +// copies to/from register classes which partially overlap with the VSX +// register file and combines the wacc/wacc_hi copies when needed. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-vsx-copy" + +namespace { +// PPCVSXWACCCopy pass - For copies between VSX registers and non-VSX registers +// (Altivec and scalar floating-point registers), we need to transform the +// copies into subregister copies with other restrictions. +struct PPCVSXWACCCopy : public MachineFunctionPass { + static char ID; + PPCVSXWACCCopy() : MachineFunctionPass(ID) {} + + const TargetInstrInfo *TII; + + bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC, + MachineRegisterInfo &MRI) { + if (Register::isVirtualRegister(Reg)) { + return RC->hasSubClassEq(MRI.getRegClass(Reg)); + } else if (RC->contains(Reg)) { + return true; + } + + return false; + } + + bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI); + } + + bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI); + } + + bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); + } + + bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); + } + + bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI); + } + +protected: + bool processBlock(MachineBasicBlock &MBB) { + bool Changed = false; + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + for (MachineInstr &MI : MBB) { + if (!MI.isFullCopy()) + continue; + + MachineOperand &DstMO = MI.getOperand(0); + MachineOperand &SrcMO = MI.getOperand(1); + + if (IsVSReg(DstMO.getReg(), MRI) && !IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *to* a VSX register from a non-VSX register. + Changed = true; + + const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass; + assert((IsF8Reg(SrcMO.getReg(), MRI) || IsVSSReg(SrcMO.getReg(), MRI) || + IsVSFReg(SrcMO.getReg(), MRI)) && + "Unknown source for a VSX copy"); + + Register NewVReg = MRI.createVirtualRegister(SrcRC); + BuildMI(MBB, MI, MI.getDebugLoc(), + TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg) + .addImm(1) // add 1, not 0, because there is no implicit clearing + // of the high bits. + .add(SrcMO) + .addImm(PPC::sub_64); + + // The source of the original copy is now the new virtual register. + SrcMO.setReg(NewVReg); + } else if (!IsVSReg(DstMO.getReg(), MRI) && + IsVSReg(SrcMO.getReg(), MRI)) { + // This is a copy *from* a VSX register to a non-VSX register. + Changed = true; + + const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass; + assert((IsF8Reg(DstMO.getReg(), MRI) || IsVSFReg(DstMO.getReg(), MRI) || + IsVSSReg(DstMO.getReg(), MRI)) && + "Unknown destination for a VSX copy"); + + // Copy the VSX value into a new VSX register of the correct subclass. + Register NewVReg = MRI.createVirtualRegister(DstRC); + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg) + .add(SrcMO); + + // Transform the original copy into a subregister extraction copy. + SrcMO.setReg(NewVReg); + SrcMO.setSubReg(PPC::sub_64); + } else if (IsRegInClass(DstMO.getReg(), &PPC::WACC_HIRCRegClass, MRI) && + IsRegInClass(SrcMO.getReg(), &PPC::WACCRCRegClass, MRI)) { + // Matches the pattern: + // %a:waccrc = COPY %b.sub_wacc_hi:dmrrc + // %c:wacc_hirc = COPY %a:waccrc + // And replaces it with: + // %c:wacc_hirc = COPY %b.sub_wacc_hi:dmrrc + MachineInstr *DefMI = MRI.getUniqueVRegDef(SrcMO.getReg()); + if (!DefMI || !DefMI->isCopy()) + continue; + + MachineOperand &OrigSrc = DefMI->getOperand(1); + + if (!IsRegInClass(OrigSrc.getReg(), &PPC::DMRRCRegClass, MRI)) + continue; + + if (OrigSrc.getSubReg() != PPC::sub_wacc_hi) + continue; + + // Rewrite the second copy to use the original register's subreg + SrcMO.setReg(OrigSrc.getReg()); + SrcMO.setSubReg(PPC::sub_wacc_hi); + Changed = true; + + // Remove the intermediate copy if safe + if (MRI.use_nodbg_empty(DefMI->getOperand(0).getReg())) + DefMI->eraseFromParent(); + } + } + + return Changed; + } + +public: + bool runOnMachineFunction(MachineFunction &MF) override { + // If we don't have VSX on the subtarget, don't do anything. + const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); + if (!STI.hasVSX()) + return false; + TII = STI.getInstrInfo(); + + bool Changed = false; + + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) + if (processBlock(B)) + Changed = true; + + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // end anonymous namespace + +INITIALIZE_PASS(PPCVSXWACCCopy, DEBUG_TYPE, "PowerPC VSX Copy Legalization", + false, false) + +char PPCVSXWACCCopy::ID = 0; +FunctionPass *llvm::createPPCVSXWACCCopyPass() { return new PPCVSXWACCCopy(); } diff --git a/llvm/lib/Target/PowerPC/README_P9.txt b/llvm/lib/Target/PowerPC/README_P9.txt index ee1ea73..208c8ab 100644 --- a/llvm/lib/Target/PowerPC/README_P9.txt +++ b/llvm/lib/Target/PowerPC/README_P9.txt @@ -224,22 +224,22 @@ VSX: . isCommutable = 1 // xsmaddqp [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsmsubqp [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsnmaddqp [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsnmsubqp [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; - Round to Odd of QP (Negative) Multiply-{Add/Subtract}: @@ -276,22 +276,22 @@ VSX: . isCommutable = 1 // xsmaddqpo [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsmsubqpo [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsnmaddqpo [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; // xsnmsubqpo [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>, - RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">, + RegConstraint<"$vTi = $vT">, AltVSXFMARel; - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp @@ -405,7 +405,7 @@ Fixed Point Facility: But how to map to it?? [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, + RegConstraint<"$XTi = $XT">, . Or use intrinsic? (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) |