diff options
Diffstat (limited to 'llvm/lib/Target/SystemZ')
| -rw-r--r-- | llvm/lib/Target/SystemZ/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZ.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 253 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 386 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZOperators.td | 279 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h | 2 |
15 files changed, 456 insertions, 636 deletions
diff --git a/llvm/lib/Target/SystemZ/CMakeLists.txt b/llvm/lib/Target/SystemZ/CMakeLists.txt index 0d8f3ea..6d94a75 100644 --- a/llvm/lib/Target/SystemZ/CMakeLists.txt +++ b/llvm/lib/Target/SystemZ/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM SystemZGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(SystemZCommonTableGen) diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp index 275165d..a24543b 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp @@ -218,7 +218,7 @@ void SystemZInstPrinterCommon::printBDXAddrOperand(const MCInst *MI, int OpNum, void SystemZInstPrinterCommon::printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - unsigned Base = MI->getOperand(OpNum).getReg(); + MCRegister Base = MI->getOperand(OpNum).getReg(); const MCOperand &DispMO = MI->getOperand(OpNum + 1); uint64_t Length = MI->getOperand(OpNum + 2).getImm(); printOperand(DispMO, &MAI, O); @@ -232,9 +232,9 @@ void SystemZInstPrinterCommon::printBDLAddrOperand(const MCInst *MI, int OpNum, void SystemZInstPrinterCommon::printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - unsigned Base = MI->getOperand(OpNum).getReg(); + MCRegister Base = MI->getOperand(OpNum).getReg(); const MCOperand &DispMO = MI->getOperand(OpNum + 1); - unsigned Length = MI->getOperand(OpNum + 2).getReg(); + MCRegister Length = MI->getOperand(OpNum + 2).getReg(); printOperand(DispMO, &MAI, O); O << "("; printRegName(O, Length); diff --git a/llvm/lib/Target/SystemZ/SystemZ.td b/llvm/lib/Target/SystemZ/SystemZ.td index ec11064..95f039d 100644 --- a/llvm/lib/Target/SystemZ/SystemZ.td +++ b/llvm/lib/Target/SystemZ/SystemZ.td @@ -57,6 +57,9 @@ include "SystemZInstrHFP.td" include "SystemZInstrDFP.td" include "SystemZInstrSystem.td" + +defm : RemapAllTargetPseudoPointerOperands<ADDR64Bit>; + def SystemZInstrInfo : InstrInfo { let guessInstructionProperties = 0; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index e31d7c6..f061272 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -1270,7 +1270,7 @@ void SystemZAsmPrinter::emitFunctionBodyEnd() { static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, bool StackProtector, bool FPRMask, bool VRMask, - bool EHBlock, bool HasName) { + bool EHBlock, bool HasArgAreaLength, bool HasName) { enum class PPA1Flag1 : uint8_t { DSA64Bit = (0x80 >> 0), VarArg = (0x80 >> 7), @@ -1282,8 +1282,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, LLVM_MARK_AS_BITMASK_ENUM(ExternalProcedure) }; enum class PPA1Flag3 : uint8_t { + HasArgAreaLength = (0x80 >> 1), FPRMask = (0x80 >> 2), - LLVM_MARK_AS_BITMASK_ENUM(FPRMask) + LLVM_MARK_AS_BITMASK_ENUM(HasArgAreaLength) }; enum class PPA1Flag4 : uint8_t { EPMOffsetPresent = (0x80 >> 0), @@ -1307,6 +1308,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, if (StackProtector) Flags2 |= PPA1Flag2::STACKPROTECTOR; + if (HasArgAreaLength) + Flags3 |= PPA1Flag3::HasArgAreaLength; // Add emit ArgAreaLength flag. + // SavedGPRMask, SavedFPRMask, and SavedVRMask are precomputed in. if (FPRMask) Flags3 |= PPA1Flag3::FPRMask; // Add emit FPR mask flag. @@ -1339,6 +1343,9 @@ static void emitPPA1Flags(std::unique_ptr<MCStreamer> &OutStreamer, bool VarArg, OutStreamer->emitInt8(static_cast<uint8_t>(Flags2)); // Flags 2. OutStreamer->AddComment("PPA1 Flags 3"); + if ((Flags3 & PPA1Flag3::HasArgAreaLength) == PPA1Flag3::HasArgAreaLength) + OutStreamer->AddComment( + " Bit 1: 1 = Argument Area Length is in optional area"); if ((Flags3 & PPA1Flag3::FPRMask) == PPA1Flag3::FPRMask) OutStreamer->AddComment(" Bit 2: 1 = FP Reg Mask is in optional area"); OutStreamer->emitInt8( @@ -1477,12 +1484,26 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { bool NeedEmitEHBlock = !MF->getLandingPads().empty(); + // Optional Argument Area Length. + // Note: This represents the length of the argument area that we reserve + // in our stack for setting up arguments for calls to other + // routines. If this optional field is not set, LE will reserve + // 128 bytes for the argument area. This optional field is + // created if greater than 128 bytes is required - to guarantee + // the required space is reserved on stack extension in the new + // extension. This optional field is also created if the + // routine has alloca(). This may reduce stack space + // if alloca() call causes a stack extension. + bool HasArgAreaLength = + (AllocaReg != 0) || (MFFrame.getMaxCallFrameSize() > 128); + bool HasName = MF->getFunction().hasName() && MF->getFunction().getName().size() > 0; emitPPA1Flags(OutStreamer, MF->getFunction().isVarArg(), MFFrame.hasStackProtectorIndex(), SavedFPRMask != 0, - TargetHasVector && SavedVRMask != 0, NeedEmitEHBlock, HasName); + TargetHasVector && SavedVRMask != 0, NeedEmitEHBlock, + HasArgAreaLength, HasName); OutStreamer->AddComment("Length/4 of Parms"); OutStreamer->emitInt16( @@ -1490,6 +1511,11 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) { OutStreamer->AddComment("Length of Code"); OutStreamer->emitAbsoluteSymbolDiff(FnEndSym, CurrentFnEPMarkerSym, 4); + if (HasArgAreaLength) { + OutStreamer->AddComment("Argument Area Length"); + OutStreamer->emitInt32(MFFrame.getMaxCallFrameSize()); + } + // Emit saved FPR mask and offset to FPR save area (0x20 of flags 3). if (SavedFPRMask) { OutStreamer->AddComment("FPR mask"); diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index dcefff9..570bbd8 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -360,12 +360,12 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } } @@ -389,10 +389,10 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters( MCRegister Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at @@ -1157,12 +1157,12 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters( if (SystemZ::FP64BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); } if (SystemZ::VR128BitRegClass.contains(Reg)) { MBB.addLiveIn(Reg); TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } } @@ -1189,10 +1189,10 @@ bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters( MCRegister Reg = I.getReg(); if (SystemZ::FP64BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::FP64BitRegClass, TRI, Register()); + &SystemZ::FP64BitRegClass, Register()); if (SystemZ::VR128BitRegClass.contains(Reg)) TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(), - &SystemZ::VR128BitRegClass, TRI, Register()); + &SystemZ::VR128BitRegClass, Register()); } // Restore call-saved GPRs (but not call-clobbered varargs, which at diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index 5313fba..8fc339f 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -115,11 +115,10 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { } bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { - const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); const MCInstrDesc &MID = MI->getDesc(); unsigned Count = 0; for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { - const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI); + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx); if (RC == nullptr) continue; if (OpIdx >= MID.getNumDefs() && diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 58109ac..23a3895 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -92,7 +92,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) - : TargetLowering(TM), Subtarget(STI) { + : TargetLowering(TM, STI), Subtarget(STI) { MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); auto *Regs = STI.getSpecialRegisters(); @@ -1970,6 +1970,28 @@ SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( return SDValue(); } +// The first part of a split stack argument is at index I in Args (and +// ArgLocs). Return the type of a part and the number of them by reference. +template <class ArgTy> +static bool analyzeArgSplit(const SmallVectorImpl<ArgTy> &Args, + SmallVector<CCValAssign, 16> &ArgLocs, unsigned I, + MVT &PartVT, unsigned &NumParts) { + if (!Args[I].Flags.isSplit()) + return false; + assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() && + "ArgLocs havoc."); + PartVT = ArgLocs[I].getValVT(); + NumParts = 1; + for (unsigned PartIdx = I + 1;; ++PartIdx) { + assert(PartIdx != ArgLocs.size() && "SplitEnd not found."); + assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split."); + ++NumParts; + if (Args[PartIdx].Flags.isSplitEnd()) + break; + } + return true; +} + SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, @@ -2074,16 +2096,26 @@ SDValue SystemZTargetLowering::LowerFormalArguments( MachinePointerInfo())); // If the original argument was split (e.g. i128), we need // to load all parts of it here (using the same address). - unsigned ArgIndex = Ins[I].OrigArgIndex; - assert (Ins[I].PartOffset == 0); - while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { - CCValAssign &PartVA = ArgLocs[I + 1]; - unsigned PartOffset = Ins[I + 1].PartOffset; - SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, - DAG.getIntPtrConstant(PartOffset, DL)); - InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, - MachinePointerInfo())); - ++I; + MVT PartVT; + unsigned NumParts; + if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) { + // TODO: It is strange that while LowerCallTo() sets the PartOffset + // relative to the first split part LowerArguments() sets the offset + // from the beginning of the struct. So with {i32, i256}, the + // PartOffset for the i256 parts are differently handled. Try to + // remove that difference and use PartOffset directly here (instead + // of SplitBaseOffs). + unsigned SplitBaseOffs = Ins[I].PartOffset; + for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) { + ++I; + CCValAssign &PartVA = ArgLocs[I]; + unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs; + SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, + DAG.getIntPtrConstant(PartOffset, DL)); + InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, + MachinePointerInfo())); + assert(PartOffset && "Offset should be non-zero."); + } } } else InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); @@ -2319,18 +2351,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - unsigned ArgIndex = Outs[I].OrigArgIndex; EVT SlotVT; - if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { - // Allocate the full stack space for a promoted (and split) argument. - Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; - EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); - MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); - unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); - SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); - } else { + MVT PartVT; + unsigned NumParts = 1; + if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts)) + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts); + else SlotVT = Outs[I].VT; - } SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); MemOpChains.push_back( @@ -2338,18 +2365,19 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). - assert (Outs[I].PartOffset == 0); - while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { - SDValue PartValue = OutVals[I + 1]; - unsigned PartOffset = Outs[I + 1].PartOffset; + assert(Outs[I].PartOffset == 0); + for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) { + ++I; + SDValue PartValue = OutVals[I]; + unsigned PartOffset = Outs[I].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); + assert(PartOffset && "Offset should be non-zero."); assert((PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"); - ++I; } ArgValue = SpillSlot; } else @@ -2534,7 +2562,7 @@ bool SystemZTargetLowering::CanLowerReturn( // Special case that we cannot easily detect in RetCC_SystemZ since // i128 may not be a legal type. for (auto &Out : Outs) - if (Out.ArgVT == MVT::i128) + if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64) return false; SmallVector<CCValAssign, 16> RetLocs; @@ -7423,153 +7451,6 @@ SystemZTargetLowering::ReplaceNodeResults(SDNode *N, return LowerOperationWrapper(N, Results, DAG); } -const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME - switch ((SystemZISD::NodeType)Opcode) { - case SystemZISD::FIRST_NUMBER: break; - OPCODE(RET_GLUE); - OPCODE(CALL); - OPCODE(SIBCALL); - OPCODE(TLS_GDCALL); - OPCODE(TLS_LDCALL); - OPCODE(PCREL_WRAPPER); - OPCODE(PCREL_OFFSET); - OPCODE(ICMP); - OPCODE(FCMP); - OPCODE(STRICT_FCMP); - OPCODE(STRICT_FCMPS); - OPCODE(TM); - OPCODE(BR_CCMASK); - OPCODE(SELECT_CCMASK); - OPCODE(ADJDYNALLOC); - OPCODE(PROBED_ALLOCA); - OPCODE(POPCNT); - OPCODE(SMUL_LOHI); - OPCODE(UMUL_LOHI); - OPCODE(SDIVREM); - OPCODE(UDIVREM); - OPCODE(SADDO); - OPCODE(SSUBO); - OPCODE(UADDO); - OPCODE(USUBO); - OPCODE(ADDCARRY); - OPCODE(SUBCARRY); - OPCODE(GET_CCMASK); - OPCODE(MVC); - OPCODE(NC); - OPCODE(OC); - OPCODE(XC); - OPCODE(CLC); - OPCODE(MEMSET_MVC); - OPCODE(STPCPY); - OPCODE(STRCMP); - OPCODE(SEARCH_STRING); - OPCODE(IPM); - OPCODE(TBEGIN); - OPCODE(TBEGIN_NOFLOAT); - OPCODE(TEND); - OPCODE(BYTE_MASK); - OPCODE(ROTATE_MASK); - OPCODE(REPLICATE); - OPCODE(JOIN_DWORDS); - OPCODE(SPLAT); - OPCODE(MERGE_HIGH); - OPCODE(MERGE_LOW); - OPCODE(SHL_DOUBLE); - OPCODE(PERMUTE_DWORDS); - OPCODE(PERMUTE); - OPCODE(PACK); - OPCODE(PACKS_CC); - OPCODE(PACKLS_CC); - OPCODE(UNPACK_HIGH); - OPCODE(UNPACKL_HIGH); - OPCODE(UNPACK_LOW); - OPCODE(UNPACKL_LOW); - OPCODE(VSHL_BY_SCALAR); - OPCODE(VSRL_BY_SCALAR); - OPCODE(VSRA_BY_SCALAR); - OPCODE(VROTL_BY_SCALAR); - OPCODE(SHL_DOUBLE_BIT); - OPCODE(SHR_DOUBLE_BIT); - OPCODE(VSUM); - OPCODE(VACC); - OPCODE(VSCBI); - OPCODE(VAC); - OPCODE(VSBI); - OPCODE(VACCC); - OPCODE(VSBCBI); - OPCODE(VMAH); - OPCODE(VMALH); - OPCODE(VME); - OPCODE(VMLE); - OPCODE(VMO); - OPCODE(VMLO); - OPCODE(VICMPE); - OPCODE(VICMPH); - OPCODE(VICMPHL); - OPCODE(VICMPES); - OPCODE(VICMPHS); - OPCODE(VICMPHLS); - OPCODE(VFCMPE); - OPCODE(STRICT_VFCMPE); - OPCODE(STRICT_VFCMPES); - OPCODE(VFCMPH); - OPCODE(STRICT_VFCMPH); - OPCODE(STRICT_VFCMPHS); - OPCODE(VFCMPHE); - OPCODE(STRICT_VFCMPHE); - OPCODE(STRICT_VFCMPHES); - OPCODE(VFCMPES); - OPCODE(VFCMPHS); - OPCODE(VFCMPHES); - OPCODE(VFTCI); - OPCODE(VEXTEND); - OPCODE(STRICT_VEXTEND); - OPCODE(VROUND); - OPCODE(STRICT_VROUND); - OPCODE(VTM); - OPCODE(SCMP128HI); - OPCODE(UCMP128HI); - OPCODE(VFAE_CC); - OPCODE(VFAEZ_CC); - OPCODE(VFEE_CC); - OPCODE(VFEEZ_CC); - OPCODE(VFENE_CC); - OPCODE(VFENEZ_CC); - OPCODE(VISTR_CC); - OPCODE(VSTRC_CC); - OPCODE(VSTRCZ_CC); - OPCODE(VSTRS_CC); - OPCODE(VSTRSZ_CC); - OPCODE(TDC); - OPCODE(ATOMIC_SWAPW); - OPCODE(ATOMIC_LOADW_ADD); - OPCODE(ATOMIC_LOADW_SUB); - OPCODE(ATOMIC_LOADW_AND); - OPCODE(ATOMIC_LOADW_OR); - OPCODE(ATOMIC_LOADW_XOR); - OPCODE(ATOMIC_LOADW_NAND); - OPCODE(ATOMIC_LOADW_MIN); - OPCODE(ATOMIC_LOADW_MAX); - OPCODE(ATOMIC_LOADW_UMIN); - OPCODE(ATOMIC_LOADW_UMAX); - OPCODE(ATOMIC_CMP_SWAPW); - OPCODE(ATOMIC_CMP_SWAP); - OPCODE(ATOMIC_LOAD_128); - OPCODE(ATOMIC_STORE_128); - OPCODE(ATOMIC_CMP_SWAP_128); - OPCODE(LRV); - OPCODE(STRV); - OPCODE(VLER); - OPCODE(VSTER); - OPCODE(STCKF); - OPCODE(PREFETCH); - OPCODE(ADA_ENTRY); - } - return nullptr; -#undef OPCODE -} - // Return true if VT is a vector whose elements are a whole number of bytes // in width. Also check for presence of vector support. bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { @@ -8820,15 +8701,17 @@ SmallVector<SDValue, 4> static simplifyAssumingCCVal(SDValue &Val, SDValue &CC, int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); - const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); - const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); - if (TrueSDVals.empty() || FalseSDVals.empty()) - return {}; + // Pruning search tree early - Moving CC test and combineCCMask ahead of + // recursive call to simplifyAssumingCCVal. SDValue Op4CCReg = Val.getOperand(4); if (Op4CCReg != CC) combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG); if (Op4CCReg != CC) return {}; + const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG); + const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG); + if (TrueSDVals.empty() || FalseSDVals.empty()) + return {}; SmallVector<SDValue, 4> MergedSDVals; for (auto &CCVal : {0, 1, 2, 3}) MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0) @@ -9004,7 +8887,12 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N, int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); - if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG)) + // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit + // the modified BR_CCMASK with the new values. + // In order to avoid conditional branches with full or empty cc masks, do not + // do this if ccmask is 0 or equal to ccvalid. + if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 && + CCMaskVal != CCValidVal) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), @@ -9091,6 +8979,13 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK( IsCombinedCCReg = true; } } + // If the condition is trivially false or trivially true after + // combineCCMask, just collapse this SELECT_CCMASK to the indicated value + // (possibly modified by constructCCSDValsFromSELECT). + if (CCMaskVal == 0) + return FalseVal; + if (CCMaskVal == CCValidVal) + return TrueVal; if (IsCombinedCCReg) return DAG.getNode( diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index d5b7603..13a1cd1 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -22,390 +22,6 @@ #include <optional> namespace llvm { -namespace SystemZISD { -enum NodeType : unsigned { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // Return with a glue operand. Operand 0 is the chain operand. - RET_GLUE, - - // Calls a function. Operand 0 is the chain operand and operand 1 - // is the target address. The arguments start at operand 2. - // There is an optional glue operand at the end. - CALL, - SIBCALL, - - // TLS calls. Like regular calls, except operand 1 is the TLS symbol. - // (The call target is implicitly __tls_get_offset.) - TLS_GDCALL, - TLS_LDCALL, - - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses (LARL). Operand 0 is the address. - PCREL_WRAPPER, - - // Used in cases where an offset is applied to a TargetGlobalAddress. - // Operand 0 is the full TargetGlobalAddress and operand 1 is a - // PCREL_WRAPPER for an anchor point. This is used so that we can - // cheaply refer to either the full address or the anchor point - // as a register base. - PCREL_OFFSET, - - // Integer comparisons. There are three operands: the two values - // to compare, and an integer of type SystemZICMP. - ICMP, - - // Floating-point comparisons. The two operands are the values to compare. - FCMP, - - // Test under mask. The first operand is ANDed with the second operand - // and the condition codes are set on the result. The third operand is - // a boolean that is true if the condition codes need to distinguish - // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the - // register forms do but the memory forms don't). - TM, - - // Branches if a condition is true. Operand 0 is the chain operand; - // operand 1 is the 4-bit condition-code mask, with bit N in - // big-endian order meaning "branch if CC=N"; operand 2 is the - // target block and operand 3 is the flag operand. - BR_CCMASK, - - // Selects between operand 0 and operand 1. Operand 2 is the - // mask of condition-code values for which operand 0 should be - // chosen over operand 1; it has the same form as BR_CCMASK. - // Operand 3 is the flag operand. - SELECT_CCMASK, - - // Evaluates to the gap between the stack pointer and the - // base of the dynamically-allocatable area. - ADJDYNALLOC, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Count number of bits set in operand 0 per byte. - POPCNT, - - // Wrappers around the ISD opcodes of the same name. The output is GR128. - // Input operands may be GR64 or GR32, depending on the instruction. - SMUL_LOHI, - UMUL_LOHI, - SDIVREM, - UDIVREM, - - // Add/subtract with overflow/carry. These have the same operands as - // the corresponding standard operations, except with the carry flag - // replaced by a condition code value. - SADDO, SSUBO, UADDO, USUBO, ADDCARRY, SUBCARRY, - - // Set the condition code from a boolean value in operand 0. - // Operand 1 is a mask of all condition-code values that may result of this - // operation, operand 2 is a mask of condition-code values that may result - // if the boolean is true. - // Note that this operation is always optimized away, we will never - // generate any code for it. - GET_CCMASK, - - // Use a series of MVCs to copy bytes from one memory location to another. - // The operands are: - // - the target address - // - the source address - // - the constant length - // - // This isn't a memory opcode because we'd need to attach two - // MachineMemOperands rather than one. - MVC, - - // Similar to MVC, but for logic operations (AND, OR, XOR). - NC, - OC, - XC, - - // Use CLC to compare two blocks of memory, with the same comments - // as for MVC. - CLC, - - // Use MVC to set a block of memory after storing the first byte. - MEMSET_MVC, - - // Use an MVST-based sequence to implement stpcpy(). - STPCPY, - - // Use a CLST-based sequence to implement strcmp(). The two input operands - // are the addresses of the strings to compare. - STRCMP, - - // Use an SRST-based sequence to search a block of memory. The first - // operand is the end address, the second is the start, and the third - // is the character to search for. CC is set to 1 on success and 2 - // on failure. - SEARCH_STRING, - - // Store the CC value in bits 29 and 28 of an integer. - IPM, - - // Transaction begin. The first operand is the chain, the second - // the TDB pointer, and the third the immediate control field. - // Returns CC value and chain. - TBEGIN, - TBEGIN_NOFLOAT, - - // Transaction end. Just the chain operand. Returns CC value and chain. - TEND, - - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - - // Create a vector constant by replicating an element-sized RISBG-style mask. - // The first operand specifies the starting set bit and the second operand - // specifies the ending set bit. Both operands count from the MSB of the - // element. - ROTATE_MASK, - - // Replicate a GPR scalar value into all elements of a vector. - REPLICATE, - - // Create a vector from two i64 GPRs. - JOIN_DWORDS, - - // Replicate one element of a vector into all elements. The first operand - // is the vector and the second is the index of the element to replicate. - SPLAT, - - // Interleave elements from the high half of operand 0 and the high half - // of operand 1. - MERGE_HIGH, - - // Likewise for the low halves. - MERGE_LOW, - - // Concatenate the vectors in the first two operands, shift them left - // by the third operand, and take the first half of the result. - SHL_DOUBLE, - - // Take one element of the first v2i64 operand and the one element of - // the second v2i64 operand and concatenate them to form a v2i64 result. - // The third operand is a 4-bit value of the form 0A0B, where A and B - // are the element selectors for the first operand and second operands - // respectively. - PERMUTE_DWORDS, - - // Perform a general vector permute on vector operands 0 and 1. - // Each byte of operand 2 controls the corresponding byte of the result, - // in the same way as a byte-level VECTOR_SHUFFLE mask. - PERMUTE, - - // Pack vector operands 0 and 1 into a single vector with half-sized elements. - PACK, - - // Likewise, but saturate the result and set CC. PACKS_CC does signed - // saturation and PACKLS_CC does unsigned saturation. - PACKS_CC, - PACKLS_CC, - - // Unpack the first half of vector operand 0 into double-sized elements. - // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. - UNPACK_HIGH, - UNPACKL_HIGH, - - // Likewise for the second half. - UNPACK_LOW, - UNPACKL_LOW, - - // Shift/rotate each element of vector operand 0 by the number of bits - // specified by scalar operand 1. - VSHL_BY_SCALAR, - VSRL_BY_SCALAR, - VSRA_BY_SCALAR, - VROTL_BY_SCALAR, - - // Concatenate the vectors in the first two operands, shift them left/right - // bitwise by the third operand, and take the first/last half of the result. - SHL_DOUBLE_BIT, - SHR_DOUBLE_BIT, - - // For each element of the output type, sum across all sub-elements of - // operand 0 belonging to the corresponding element, and add in the - // rightmost sub-element of the corresponding element of operand 1. - VSUM, - - // Compute carry/borrow indication for add/subtract. - VACC, VSCBI, - // Add/subtract with carry/borrow. - VAC, VSBI, - // Compute carry/borrow indication for add/subtract with carry/borrow. - VACCC, VSBCBI, - - // High-word multiply-and-add. - VMAH, VMALH, - // Widen and multiply even/odd vector elements. - VME, VMLE, VMO, VMLO, - - // Compare integer vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VICMPE is for equality, VICMPH for "signed greater than" - // and VICMPHL for "unsigned greater than". - VICMPE, - VICMPH, - VICMPHL, - - // Likewise, but also set the condition codes on the result. - VICMPES, - VICMPHS, - VICMPHLS, - - // Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 - // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and - // greater than" and VFCMPHE for "ordered and greater than or equal to". - VFCMPE, - VFCMPH, - VFCMPHE, - - // Likewise, but also set the condition codes on the result. - VFCMPES, - VFCMPHS, - VFCMPHES, - - // Test floating-point data class for vectors. - VFTCI, - - // Extend the even f32 elements of vector operand 0 to produce a vector - // of f64 elements. - VEXTEND, - - // Round the f64 elements of vector operand 0 to f32s and store them in the - // even elements of the result. - VROUND, - - // AND the two vector operands together and set CC based on the result. - VTM, - - // i128 high integer comparisons. - SCMP128HI, - UCMP128HI, - - // String operations that set CC as a side-effect. - VFAE_CC, - VFAEZ_CC, - VFEE_CC, - VFEEZ_CC, - VFENE_CC, - VFENEZ_CC, - VISTR_CC, - VSTRC_CC, - VSTRCZ_CC, - VSTRS_CC, - VSTRSZ_CC, - - // Test Data Class. - // - // Operand 0: the value to test - // Operand 1: the bit mask - TDC, - - // z/OS XPLINK ADA Entry - // Wraps a TargetGlobalAddress that should be loaded from a function's - // AssociatedData Area (ADA). Tha ADA is passed to the function by the - // caller in the XPLink ABI defined register R5. - // Operand 0: the GlobalValue/External Symbol - // Operand 1: the ADA register - // Operand 2: the offset (0 for the first and 8 for the second element in the - // function descriptor) - ADA_ENTRY, - - // Strict variants of scalar floating-point comparisons. - // Quiet and signaling versions. - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Strict variants of vector floating-point comparisons. - // Quiet and signaling versions. - STRICT_VFCMPE, - STRICT_VFCMPH, - STRICT_VFCMPHE, - STRICT_VFCMPES, - STRICT_VFCMPHS, - STRICT_VFCMPHES, - - // Strict variants of VEXTEND and VROUND. - STRICT_VEXTEND, - STRICT_VROUND, - LAST_STRICTFP_OPCODE = STRICT_VROUND, - - // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or - // ATOMIC_LOAD_<op>. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the second operand of <op>, in the high bits of an i32 - // for everything except ATOMIC_SWAPW - // Operand 2: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 3: the negative of operand 2, for rotating the other way - // Operand 4: the width of the field in bits (8 or 16) - FIRST_MEMORY_OPCODE, - ATOMIC_SWAPW = FIRST_MEMORY_OPCODE, - ATOMIC_LOADW_ADD, - ATOMIC_LOADW_SUB, - ATOMIC_LOADW_AND, - ATOMIC_LOADW_OR, - ATOMIC_LOADW_XOR, - ATOMIC_LOADW_NAND, - ATOMIC_LOADW_MIN, - ATOMIC_LOADW_MAX, - ATOMIC_LOADW_UMIN, - ATOMIC_LOADW_UMAX, - - // A wrapper around the inner loop of an ATOMIC_CMP_SWAP. - // - // Operand 0: the address of the containing 32-bit-aligned field - // Operand 1: the compare value, in the low bits of an i32 - // Operand 2: the swap value, in the low bits of an i32 - // Operand 3: how many bits to rotate the i32 left to bring the first - // operand into the high bits - // Operand 4: the negative of operand 2, for rotating the other way - // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW, - - // Atomic compare-and-swap returning CC value. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP, - - // 128-bit atomic load. - // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) - ATOMIC_LOAD_128, - - // 128-bit atomic store. - // OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) - ATOMIC_STORE_128, - - // 128-bit atomic compare-and-swap. - // Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) - ATOMIC_CMP_SWAP_128, - - // Byte swapping load/store. Same operands as regular load/store. - LRV, STRV, - - // Element swapping load/store. Same operands as regular load/store. - VLER, VSTER, - - // Use STORE CLOCK FAST to store current TOD clock value. - STCKF, - - // Prefetch from the second operand using the 4-bit control code in - // the first operand. The code is 1 for a load prefetch and 2 for - // a store prefetch. - PREFETCH, - LAST_MEMORY_OPCODE = PREFETCH, -}; - -// Return true if OPCODE is some kind of PC-relative address. -inline bool isPCREL(unsigned Opcode) { - return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; -} -} // end namespace SystemZISD namespace SystemZICMP { // Describes whether an integer comparison needs to be signed or unsigned, @@ -532,8 +148,6 @@ public: return true; } - const char *getTargetNodeName(unsigned Opcode) const override; - // This function currently returns cost for srl/ipm/cc sequence for merging. CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index 2e21f27..db4f9a1 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -60,7 +60,7 @@ static uint64_t allOnes(unsigned int Count) { void SystemZInstrInfo::anchor() {} SystemZInstrInfo::SystemZInstrInfo(const SystemZSubtarget &sti) - : SystemZGenInstrInfo(sti, -1, -1), + : SystemZGenInstrInfo(sti, RI, -1, -1), RI(sti.getSpecialRegisters()->getReturnFunctionAddressRegister(), sti.getHwMode()), STI(sti) {} @@ -1023,8 +1023,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, void SystemZInstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, - MachineInstr::MIFlag Flags) const { + + Register VReg, MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -1036,10 +1036,12 @@ void SystemZInstrInfo::storeRegToStackSlot( FrameIdx); } -void SystemZInstrInfo::loadRegFromStackSlot( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, - int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, - Register VReg, MachineInstr::MIFlag Flags) const { +void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register DestReg, int FrameIdx, + const TargetRegisterClass *RC, + Register VReg, + MachineInstr::MIFlag Flags) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); // Callers may expect a single instruction, so keep 128-bit moves @@ -2358,3 +2360,19 @@ SystemZInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { return std::nullopt; } + +std::pair<unsigned, unsigned> +SystemZInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); +} + +ArrayRef<std::pair<unsigned, const char *>> +SystemZInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace SystemZII; + + static const std::pair<unsigned, const char *> TargetFlags[] = { + {MO_ADA_DATA_SYMBOL_ADDR, "systemz-ada-datasymboladdr"}, + {MO_ADA_INDIRECT_FUNC_DESC, "systemz-ada-indirectfuncdesc"}, + {MO_ADA_DIRECT_FUNC_DESC, "systemz-ada-directfuncdesc"}}; + return ArrayRef(TargetFlags); +} diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h index 7b9ad7b..9fadf7b 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -71,18 +71,13 @@ enum { MO_GOT = (1 << 0), // @INDNTPOFF - MO_INDNTPOFF = (2 << 0) -}; + MO_INDNTPOFF = (2 << 0), -// z/OS XPLink specific: classifies the types of -// accesses to the ADA (Associated Data Area). -// These enums contains values that overlap with the above MO_ enums, -// but that's fine since the above enums are used with ELF, -// while these values are used with z/OS. -enum { - MO_ADA_DATA_SYMBOL_ADDR = 1, - MO_ADA_INDIRECT_FUNC_DESC, - MO_ADA_DIRECT_FUNC_DESC, + // z/OS XPLink specific: classifies the types of + // accesses to the ADA (Associated Data Area). + MO_ADA_DATA_SYMBOL_ADDR = (1 << 2), + MO_ADA_INDIRECT_FUNC_DESC = (2 << 2), + MO_ADA_DIRECT_FUNC_DESC = (3 << 2), }; // Classifies a branch. @@ -281,12 +276,14 @@ public: void storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; void loadRegFromStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, Register VReg, + + Register VReg, MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override; MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override; @@ -389,6 +386,12 @@ public: std::optional<DestSourcePair> isCopyInstrImpl(const MachineInstr &MI) const override; + + std::pair<unsigned, unsigned> + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef<std::pair<unsigned, const char *>> + getSerializableDirectMachineOperandTargetFlags() const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp index 21a233b2..b7a93e7 100644 --- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -216,6 +216,7 @@ static unsigned getInstSizeInBytes(const MachineInstr &MI, MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() || MI.isImplicitDef() || MI.getOpcode() == TargetOpcode::MEMBARRIER || MI.getOpcode() == TargetOpcode::INIT_UNDEF || MI.isFakeUse() || + MI.getOpcode() == TargetOpcode::RELOC_NONE || // These have a size that may be zero: MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP || MI.getOpcode() == SystemZ::PATCHPOINT || diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td index 547d3dc..a02cafa 100644 --- a/llvm/lib/Target/SystemZ/SystemZOperators.td +++ b/llvm/lib/Target/SystemZ/SystemZOperators.td @@ -265,74 +265,151 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, SDNPOutGlue]>; def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; -// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. +// Return with a glue operand. Operand 0 is the chain operand. def z_retglue : SDNode<"SystemZISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Calls a function. Operand 0 is the chain operand and operand 1 +// is the target address. The arguments start at operand 2. +// There is an optional glue operand at the end. def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// TLS calls. Like regular calls, except operand 1 is the TLS symbol. +// (The call target is implicitly __tls_get_offset.) def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPVariadic]>; def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Wraps a TargetGlobalAddress that should be loaded using PC-relative +// accesses (LARL). Operand 0 is the address. def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; + +// Used in cases where an offset is applied to a TargetGlobalAddress. +// Operand 0 is the full TargetGlobalAddress and operand 1 is a +// PCREL_WRAPPER for an anchor point. This is used so that we can +// cheaply refer to either the full address or the anchor point +// as a register base. def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; + +// Integer comparisons. There are three operands: the two values +// to compare, and an integer of type SystemZICMP. def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp>; + +// Floating-point comparisons. The two operands are the values to compare. def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp>; -def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, - [SDNPHasChain]>; -def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, - [SDNPHasChain]>; + +let IsStrictFP = true in { + // Strict variants of scalar floating-point comparisons. + // Quiet and signaling versions. + def z_strict_fcmp : SDNode<"SystemZISD::STRICT_FCMP", SDT_ZCmp, + [SDNPHasChain]>; + def z_strict_fcmps : SDNode<"SystemZISD::STRICT_FCMPS", SDT_ZCmp, + [SDNPHasChain]>; +} + +// Test under mask. The first operand is ANDed with the second operand +// and the condition codes are set on the result. The third operand is +// a boolean that is true if the condition codes need to distinguish +// between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the +// register forms do but the memory forms don't). def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp>; + +// Branches if a condition is true. Operand 0 is the chain operand; +// operand 1 is the 4-bit condition-code mask, with bit N in +// big-endian order meaning "branch if CC=N"; operand 2 is the +// target block and operand 3 is the flag operand. def z_br_ccmask_1 : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask, [SDNPHasChain]>; + +// Selects between operand 0 and operand 1. Operand 2 is the +// mask of condition-code values for which operand 0 should be +// chosen over operand 1; it has the same form as BR_CCMASK. +// Operand 3 is the flag operand. def z_select_ccmask_1 : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask>; + +// Store the CC value in bits 29 and 28 of an integer. def z_ipm_1 : SDNode<"SystemZISD::IPM", SDT_ZIPM>; + +// Evaluates to the gap between the stack pointer and the +// base of the dynamically-allocatable area. def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; + +// For allocating stack space when using stack clash protector. +// Allocation is performed by block, and each block is probed. def z_probed_alloca : SDNode<"SystemZISD::PROBED_ALLOCA", SDT_ZProbedAlloca, [SDNPHasChain]>; + +// Count number of bits set in operand 0 per byte. def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; + +// Wrappers around the ISD opcodes of the same name. The output is GR128. +// Input operands may be GR64 or GR32, depending on the instruction. def z_smul_lohi : SDNode<"SystemZISD::SMUL_LOHI", SDT_ZGR128Binary>; def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; + +// Add/subtract with overflow/carry. These have the same operands as +// the corresponding standard operations, except with the carry flag +// replaced by a condition code value. def z_saddo : SDNode<"SystemZISD::SADDO", SDT_ZBinaryWithFlags>; def z_ssubo : SDNode<"SystemZISD::SSUBO", SDT_ZBinaryWithFlags>; def z_uaddo : SDNode<"SystemZISD::UADDO", SDT_ZBinaryWithFlags>; def z_usubo : SDNode<"SystemZISD::USUBO", SDT_ZBinaryWithFlags>; def z_addcarry_1 : SDNode<"SystemZISD::ADDCARRY", SDT_ZBinaryWithCarry>; def z_subcarry_1 : SDNode<"SystemZISD::SUBCARRY", SDT_ZBinaryWithCarry>; + +// Compute carry/borrow indication for add/subtract. def z_vacc : SDNode<"SystemZISD::VACC", SDTIntBinOp>; -def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>; -def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>; def z_vscbi : SDNode<"SystemZISD::VSCBI", SDTIntBinOp>; + +// Add/subtract with carry/borrow. +def z_vac : SDNode<"SystemZISD::VAC", SDT_ZTernary>; def z_vsbi : SDNode<"SystemZISD::VSBI", SDT_ZTernary>; + +// Compute carry/borrow indication for add/subtract with carry/borrow. +def z_vaccc : SDNode<"SystemZISD::VACCC", SDT_ZTernary>; def z_vsbcbi : SDNode<"SystemZISD::VSBCBI", SDT_ZTernary>; + +// High-word multiply-and-add. def z_vmah : SDNode<"SystemZISD::VMAH", SDT_ZTernary>; def z_vmalh : SDNode<"SystemZISD::VMALH", SDT_ZTernary>; + +// Widen and multiply even/odd vector elements. def z_vme : SDNode<"SystemZISD::VME", SDT_ZBinaryConv>; def z_vmle : SDNode<"SystemZISD::VMLE", SDT_ZBinaryConv>; def z_vmo : SDNode<"SystemZISD::VMO", SDT_ZBinaryConv>; def z_vmlo : SDNode<"SystemZISD::VMLO", SDT_ZBinaryConv>; +// Byte swapping load/store. Same operands as regular load/store. def z_loadbswap : SDNode<"SystemZISD::LRV", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Element swapping load/store. Same operands as regular load/store. def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Use STORE CLOCK FAST to store current TOD clock value. def z_stckf : SDNode<"SystemZISD::STCKF", SDT_ZStoreInherent, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// Test Data Class. +// +// Operand 0: the value to test +// Operand 1: the bit mask def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>; def z_eh_sjlj_setjmp : SDNode<"ISD::EH_SJLJ_SETJMP", SDT_ZSetJmp, @@ -346,26 +423,75 @@ def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; + +// Create a vector constant by filling byte N of the result with bit +// 15-N of the single operand. def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; + +// Create a vector constant by replicating an element-sized RISBG-style mask. +// The first operand specifies the starting set bit and the second operand +// specifies the ending set bit. Both operands count from the MSB of the +// element. def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; + +// Replicate a GPR scalar value into all elements of a vector. def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; + +// Create a vector from two i64 GPRs. def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; + +// Replicate one element of a vector into all elements. The first operand +// is the vector and the second is the index of the element to replicate. def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; + +// Interleave elements from the high half of operand 0 and the high half +// of operand 1. def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; + +// Likewise for the low halves. def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; + +// Concatenate the vectors in the first two operands, shift them left +// by the third operand, and take the first half of the result. def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; + +// Concatenate the vectors in the first two operands, shift them left/right +// bitwise by the third operand, and take the first/last half of the result. def z_shl_double_bit : SDNode<"SystemZISD::SHL_DOUBLE_BIT", SDT_ZVecTernaryInt>; def z_shr_double_bit : SDNode<"SystemZISD::SHR_DOUBLE_BIT", SDT_ZVecTernaryInt>; + +// Take one element of the first v2i64 operand and the one element of +// the second v2i64 operand and concatenate them to form a v2i64 result. +// The third operand is a 4-bit value of the form 0A0B, where A and B +// are the element selectors for the first operand and second operands +// respectively. def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", SDT_ZVecTernaryInt>; + +// Perform a general vector permute on vector operands 0 and 1. +// Each byte of operand 2 controls the corresponding byte of the result, +// in the same way as a byte-level VECTOR_SHUFFLE mask. def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; + +// Pack vector operands 0 and 1 into a single vector with half-sized elements. def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; + +// Likewise, but saturate the result and set CC. PACKS_CC does signed +// saturation and PACKLS_CC does unsigned saturation. def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConvCC>; def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConvCC>; + +// Unpack the first half of vector operand 0 into double-sized elements. +// UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnpack>; def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnpack>; + +// Likewise for the second half. def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnpack>; def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnpack>; + +// Shift/rotate each element of vector operand 0 by the number of bits +// specified by scalar operand 1. def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", SDT_ZVecBinaryInt>; def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", @@ -374,40 +500,75 @@ def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", SDT_ZVecBinaryInt>; def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR", SDT_ZVecBinaryInt>; + +// For each element of the output type, sum across all sub-elements of +// operand 0 belonging to the corresponding element, and add in the +// rightmost sub-element of the corresponding element of operand 1. def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZBinaryConv>; + +// Compare integer vector operands 0 and 1 to produce the usual 0/-1 +// vector result. VICMPE is for equality, VICMPH for "signed greater than" +// and VICMPHL for "unsigned greater than". def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecCompare>; def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecCompare>; def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecCompare>; + +// Likewise, but also set the condition codes on the result. def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecCompareCC>; def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecCompareCC>; def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecCompareCC>; + +// Compare floating-point vector operands 0 and 1 to produce the usual 0/-1 +// vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and +// greater than" and VFCMPHE for "ordered and greater than or equal to". def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; -def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; -def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", - SDT_ZVecBinaryConv, [SDNPHasChain]>; def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; -def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", - SDT_ZVecBinaryConv, [SDNPHasChain]>; -def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", - SDT_ZVecBinaryConv, [SDNPHasChain]>; + +// Likewise, but also set the condition codes on the result. def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConvCC>; def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConvCC>; def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConvCC>; + +// Extend the even f32 elements of vector operand 0 to produce a vector +// of f64 elements. def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; -def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", - SDT_ZVecUnaryConv, [SDNPHasChain]>; + +// Round the f64 elements of vector operand 0 to f32s and store them in the +// even elements of the result. def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; -def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", + +let IsStrictFP = true in { + // Strict variants of vector floating-point comparisons. + // Quiet and signaling versions. + def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmph : SDNode<"SystemZISD::STRICT_VFCMPH", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphe : SDNode<"SystemZISD::STRICT_VFCMPHE", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmpes : SDNode<"SystemZISD::STRICT_VFCMPES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphs : SDNode<"SystemZISD::STRICT_VFCMPHS", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + def z_strict_vfcmphes : SDNode<"SystemZISD::STRICT_VFCMPHES", + SDT_ZVecBinaryConv, [SDNPHasChain]>; + + // Strict variants of VEXTEND and VROUND. + def z_strict_vextend : SDNode<"SystemZISD::STRICT_VEXTEND", + SDT_ZVecUnaryConv, [SDNPHasChain]>; + def z_strict_vround : SDNode<"SystemZISD::STRICT_VROUND", SDT_ZVecUnaryConv, [SDNPHasChain]>; +} + +// AND the two vector operands together and set CC based on the result. def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp>; + +// i128 high integer comparisons. def z_scmp128hi : SDNode<"SystemZISD::SCMP128HI", SDT_ZCmp>; def z_ucmp128hi : SDNode<"SystemZISD::UCMP128HI", SDT_ZCmp>; + +// String operations that set CC as a side-effect. def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryIntCC>; def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryIntCC>; def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinaryCC>; @@ -423,12 +584,24 @@ def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC", SDT_ZVecTernaryConvCC>; def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC", SDT_ZVecTernaryConvCC>; + +// Test floating-point data class for vectors. def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>; class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW> : SDNode<"SystemZISD::"#name, profile, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or +// ATOMIC_LOAD_<op>. +// +// Operand 0: the address of the containing 32-bit-aligned field +// Operand 1: the second operand of <op>, in the high bits of an i32 +// for everything except ATOMIC_SWAPW +// Operand 2: how many bits to rotate the i32 left to bring the first +// operand into the high bits +// Operand 3: the negative of operand 2, for rotating the other way +// Operand 4: the width of the field in bits (8 or 16) def z_atomic_swapw : AtomicWOp<"ATOMIC_SWAPW">; def z_atomic_loadw_add : AtomicWOp<"ATOMIC_LOADW_ADD">; def z_atomic_loadw_sub : AtomicWOp<"ATOMIC_LOADW_SUB">; @@ -441,55 +614,117 @@ def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; +// Atomic compare-and-swap returning CC value. +// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", SDT_ZAtomicCmpSwap, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +// A wrapper around the inner loop of an ATOMIC_CMP_SWAP. +// +// Operand 0: the address of the containing 32-bit-aligned field +// Operand 1: the compare value, in the low bits of an i32 +// Operand 2: the swap value, in the low bits of an i32 +// Operand 3: how many bits to rotate the i32 left to bring the first +// operand into the high bits +// Operand 4: the negative of operand 2, for rotating the other way +// Operand 5: the width of the field in bits (8 or 16) def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// 128-bit atomic load. +// Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", SDT_ZAtomicLoad128, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// 128-bit atomic store. +// OUTCHAIN = ATOMIC_STORE_128(INCHAIN, val, ptr) def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", SDT_ZAtomicStore128, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// 128-bit atomic compare-and-swap. +// Val, CC, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", SDT_ZAtomicCmpSwap128, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Use a series of MVCs to copy bytes from one memory location to another. +// The operands are: +// - the target address +// - the source address +// - the constant length +// +// This isn't a memory opcode because we'd need to attach two +// MachineMemOperands rather than one. def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Similar to MVC, but for logic operations (AND, OR, XOR). def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use CLC to compare two blocks of memory, with the same comments +// as for MVC. def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC, [SDNPHasChain, SDNPMayLoad]>; + +// Use MVC to set a block of memory after storing the first byte. def z_memset_mvc : SDNode<"SystemZISD::MEMSET_MVC", SDT_ZMemsetMVC, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use a CLST-based sequence to implement strcmp(). The two input operands +// are the addresses of the strings to compare. def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC, [SDNPHasChain, SDNPMayLoad]>; + +// Use an MVST-based sequence to implement stpcpy(). def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; + +// Use an SRST-based sequence to search a block of memory. The first +// operand is the end address, the second is the start, and the third +// is the character to search for. CC is set to 1 on success and 2 +// on failure. def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZStringCC, [SDNPHasChain, SDNPMayLoad]>; + +// Prefetch from the second operand using the 4-bit control code in +// the first operand. The code is 1 for a load prefetch and 2 for +// a store prefetch. def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; +// Transaction begin. The first operand is the chain, the second +// the TDB pointer, and the third the immediate control field. +// Returns CC value and chain. def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, [SDNPHasChain, SDNPMayStore, SDNPSideEffect]>; + +// Transaction end. Just the chain operand. Returns CC value and chain. def z_tend : SDNode<"SystemZISD::TEND", SDT_ZTEnd, [SDNPHasChain, SDNPSideEffect]>; +// z/OS XPLINK ADA Entry +// Wraps a TargetGlobalAddress that should be loaded from a function's +// AssociatedData Area (ADA). Tha ADA is passed to the function by the +// caller in the XPLink ABI defined register R5. +// Operand 0: the GlobalValue/External Symbol +// Operand 1: the ADA register +// Operand 2: the offset (0 for the first and 8 for the second element in the +// function descriptor) def z_ada_entry : SDNode<"SystemZISD::ADA_ENTRY", SDT_ZADAENTRY>; diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index eb00d48..88feba8 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -10,21 +10,27 @@ // //===----------------------------------------------------------------------===// +#include "SystemZSelectionDAGInfo.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" +#define GET_SDNODE_DESC +#include "SystemZGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "systemz-selectiondag-info" -bool SystemZSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_MEMORY_OPCODE && - Opcode <= SystemZISD::LAST_MEMORY_OPCODE; -} +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() + : SelectionDAGGenTargetInfo(SystemZGenSDNodeInfo) {} + +const char *SystemZSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast<SystemZISD::NodeType>(Opcode)) { + case SystemZISD::GET_CCMASK: + return "SystemZISD::GET_CCMASK"; + } -bool SystemZSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= SystemZISD::FIRST_STRICTFP_OPCODE && - Opcode <= SystemZISD::LAST_STRICTFP_OPCODE; + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } static unsigned getMemMemLenAdj(unsigned Op) { diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 200566f..d25fdda 100644 --- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -15,15 +15,34 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "SystemZGenSDNodeInfo.inc" + namespace llvm { +namespace SystemZISD { + +enum NodeType : unsigned { + // Set the condition code from a boolean value in operand 0. + // Operand 1 is a mask of all condition-code values that may result of this + // operation, operand 2 is a mask of condition-code values that may result + // if the boolean is true. + // Note that this operation is always optimized away, we will never + // generate any code for it. + GET_CCMASK = GENERATED_OPCODE_END, +}; -class SystemZSelectionDAGInfo : public SelectionDAGTargetInfo { -public: - explicit SystemZSelectionDAGInfo() = default; +// Return true if OPCODE is some kind of PC-relative address. +inline bool isPCREL(unsigned Opcode) { + return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET; +} - bool isTargetMemoryOpcode(unsigned Opcode) const override; +} // namespace SystemZISD + +class SystemZSelectionDAGInfo : public SelectionDAGGenTargetInfo { +public: + SystemZSelectionDAGInfo(); - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h index 9d0adbb..87ec256 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetObjectFile.h @@ -16,7 +16,7 @@ namespace llvm { /// This implementation is used for SystemZ ELF targets. class SystemZELFTargetObjectFile : public TargetLoweringObjectFileELF { public: - SystemZELFTargetObjectFile() {} + SystemZELFTargetObjectFile() = default; /// Describe a TLS variable address within debug info. const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override; |
