diff options
Diffstat (limited to 'llvm/lib')
31 files changed, 304 insertions, 54 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 4f6a19f..d656f10 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3482,7 +3482,7 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, bool IRTranslator::translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) + if (!MF->getTarget().getTargetTriple().isSPIRV() && containsBF16Type(U)) return false; const AtomicRMWInst &I = cast<AtomicRMWInst>(U); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbc1d73..80bbfea 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -76,7 +76,6 @@ #include <cstdlib> #include <limits> #include <optional> -#include <set> #include <string> #include <utility> #include <vector> diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp index 91b3dd5..c82edd9 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -15,8 +15,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/TimeProfiler.h" -#include <map> - using namespace llvm; using namespace llvm::msf; using namespace llvm::support; diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index 0aefe6e..769dbd4 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -21,7 +21,6 @@ #include "llvm/Demangle/StringViewExtras.h" #include "llvm/Demangle/Utility.h" -#include <array> #include <cctype> #include <cstdio> #include <optional> diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h index 55442e0..50ba2f8 100644 --- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h @@ -23,8 +23,6 @@ #define DEBUG_TYPE "jitlink" -#include <list> - namespace llvm { namespace jitlink { diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h index 343218e..91021e4 100644 --- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h @@ -21,8 +21,6 @@ #include "EHFrameSupportImpl.h" #include "JITLinkGeneric.h" -#include <list> - namespace llvm { namespace jitlink { diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp index 1a81d18..9ae4c98 100644 --- a/llvm/lib/SandboxIR/Instruction.cpp +++ b/llvm/lib/SandboxIR/Instruction.cpp @@ -1125,6 +1125,33 @@ void SwitchInst::setDefaultDest(BasicBlock *DefaultCase) { cast<llvm::SwitchInst>(Val)->setDefaultDest( cast<llvm::BasicBlock>(DefaultCase->Val)); } + +template <typename LLVMCaseItT, typename BlockT, typename ConstT> +ConstT * +SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseValue() const { + const auto &LLVMCaseHandle = *LLVMCaseIt; + auto *LLVMC = Ctx.getValue(LLVMCaseHandle.getCaseValue()); + return cast<ConstT>(LLVMC); +} + +template <typename LLVMCaseItT, typename BlockT, typename ConstT> +BlockT * +SwitchInst::CaseHandleImpl<LLVMCaseItT, BlockT, ConstT>::getCaseSuccessor() + const { + const auto &LLVMCaseHandle = *LLVMCaseIt; + auto *LLVMBB = LLVMCaseHandle.getCaseSuccessor(); + return cast<BlockT>(Ctx.getValue(LLVMBB)); +} + +template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::CaseIt, BasicBlock, + ConstantInt>; +template class SwitchInst::CaseItImpl<llvm::SwitchInst::CaseIt, BasicBlock, + ConstantInt>; +template class SwitchInst::CaseHandleImpl<llvm::SwitchInst::ConstCaseIt, + const BasicBlock, const ConstantInt>; +template class SwitchInst::CaseItImpl<llvm::SwitchInst::ConstCaseIt, + const BasicBlock, const ConstantInt>; + ConstantInt *SwitchInst::findCaseDest(BasicBlock *BB) { auto *LLVMC = cast<llvm::SwitchInst>(Val)->findCaseDest( cast<llvm::BasicBlock>(BB->Val)); diff --git a/llvm/lib/Support/DeltaAlgorithm.cpp b/llvm/lib/Support/DeltaAlgorithm.cpp index d763cde..e91ee91 100644 --- a/llvm/lib/Support/DeltaAlgorithm.cpp +++ b/llvm/lib/Support/DeltaAlgorithm.cpp @@ -8,7 +8,6 @@ #include "llvm/ADT/DeltaAlgorithm.h" #include <algorithm> #include <iterator> -#include <set> using namespace llvm; DeltaAlgorithm::~DeltaAlgorithm() = default; diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp index 3bff4e1..32e2a2e 100644 --- a/llvm/lib/Support/MD5.cpp +++ b/llvm/lib/Support/MD5.cpp @@ -43,7 +43,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Endian.h" -#include <array> #include <cstdint> #include <cstring> diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c8a038f..76a790dc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -26050,7 +26050,7 @@ static SDValue performCSELCombine(SDNode *N, // CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1 // CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1 if (SDValue Folded = foldCSELofCTTZ(N, DAG)) - return Folded; + return Folded; // CSEL a, b, cc, SUBS(x, y) -> CSEL a, b, swapped(cc), SUBS(y, x) // if SUB(y, x) already exists and we can produce a swapped predicate for cc. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 14b0f9a..3940246 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -5666,6 +5666,9 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, MachineRegisterInfo &MRI) { LLT DstTy = MRI.getType(Dst); unsigned DstSize = DstTy.getSizeInBits(); + assert((DstSize == 64 || DstSize == 128) && + "Unexpected vector constant size"); + if (CV->isNullValue()) { if (DstSize == 128) { auto Mov = @@ -5735,17 +5738,24 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, // Try to create the new constants with MOVI, and if so generate a fneg // for it. if (auto *NewOp = TryMOVIWithBits(NegBits)) { - Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass); + Register NewDst = MRI.createVirtualRegister( + DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass); NewOp->getOperand(0).setReg(NewDst); return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst}); } return nullptr; }; MachineInstr *R; - if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) || - (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) || + if ((R = TryWithFNeg(DefBits, 32, + DstSize == 64 ? AArch64::FNEGv2f32 + : AArch64::FNEGv4f32)) || + (R = TryWithFNeg(DefBits, 64, + DstSize == 64 ? AArch64::FNEGDr + : AArch64::FNEGv2f64)) || (STI.hasFullFP16() && - (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16)))) + (R = TryWithFNeg(DefBits, 16, + DstSize == 64 ? AArch64::FNEGv4f16 + : AArch64::FNEGv8f16)))) return R; } diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e67db8e..b119146 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1402,7 +1402,7 @@ static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::createImm(U | (imm << 4) | Rm)); } else { if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; + return MCDisassembler::Fail; Inst.addOperand(MCOperand::createImm(U)); } @@ -1922,7 +1922,7 @@ static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, imm |= fieldFromInstruction(Insn, 24, 1) << 1; if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, 4, Inst, Decoder)) - Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm))); + Inst.addOperand(MCOperand::createImm(SignExtend32<26>(imm))); return S; } @@ -3703,17 +3703,17 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, Rdm |= fieldFromInstruction(Insn, 7, 1) << 3; if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) - return MCDisassembler::Fail; + return MCDisassembler::Fail; Inst.addOperand(MCOperand::createReg(ARM::SP)); if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) - return MCDisassembler::Fail; + return MCDisassembler::Fail; } else if (Inst.getOpcode() == ARM::tADDspr) { unsigned Rm = fieldFromInstruction(Insn, 3, 4); Inst.addOperand(MCOperand::createReg(ARM::SP)); Inst.addOperand(MCOperand::createReg(ARM::SP)); if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; + return MCDisassembler::Fail; } return S; diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h index 8707b08..f2c00c7 100644 --- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h +++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h @@ -18,7 +18,6 @@ #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/MemoryBufferRef.h" -#include <map> #include <memory> #include <string> #include <vector> diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 526b4de..04a9760 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3948,3 +3948,13 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { return AtomicExpansionKind::LLSC; } + +bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // Only sink 'and' mask to cmp use block if it is masking a single bit since + // this will fold the and/cmp/br into a single tstbit instruction. + ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); + if (!Mask) + return false; + return Mask->getValue().isPowerOf2(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8d04edb..4ac3e76 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -160,6 +160,8 @@ public: bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; + /// Return true if an FMA operation is faster than a pair of mul and add /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 6dd83c1..2ee3b9d 100644 --- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -198,7 +198,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, // Reaching Def to an offset register can't be a phi. if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && MI.getParent() != UseMI.getParent()) - return false; + return false; const MCInstrDesc &UseMID = UseMI.getDesc(); if ((!UseMID.mayLoad() && !UseMID.mayStore()) || diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index ce2de75..a3c8a88 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -28,7 +28,6 @@ #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> -#include <map> #include <optional> using namespace llvm; diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp index f548a8d..5107c8d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -111,4 +111,25 @@ bool LoongArchTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const { } } -// TODO: Implement more hooks to provide TTI machinery for LoongArch. +LoongArchTTIImpl::TTI::MemCmpExpansionOptions +LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + + if (!ST->hasUAL()) + return Options; + + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + Options.AllowOverlappingLoads = true; + + // TODO: Support for vectors. + if (ST->is64Bit()) { + Options.LoadSizes = {8, 4, 2, 1}; + Options.AllowedTailExpansions = {3, 5, 6}; + } else { + Options.LoadSizes = {4, 2, 1}; + Options.AllowedTailExpansions = {3}; + } + + return Options; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h index e3f16c7..9b479f9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -55,7 +55,8 @@ public: bool shouldExpandReduction(const IntrinsicInst *II) const override; - // TODO: Implement more hooks to provide TTI machinery for LoongArch. + TTI::MemCmpExpansionOptions + enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp index 5d08f56..aa94f54 100644 --- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp @@ -405,9 +405,9 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm, } if (SecondRegSaved) copyPhysReg(MBB, II, DL, SecondRegSavedTo, SecondRegSaved, true); + } else { + Available.reset(SpReg); } - else - Available.reset(SpReg); copyPhysReg(MBB, II, DL, SpReg, Mips::SP, false); BuildMI(MBB, II, DL, get(Mips::AdduRxRyRz16), Reg) .addReg(SpReg, RegState::Kill) diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index d92ae8d..21d7768 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -25,7 +25,6 @@ #include "llvm/Support/Alignment.h" #include "llvm/Support/FormatVariadic.h" #include <cstdarg> -#include <set> #include <string> namespace llvm { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1977d33..a3ccbd8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -87,6 +87,11 @@ static cl::opt<bool> "be combined with a shift"), cl::init(true)); +// TODO: Support more ops +static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS, + ISD::VP_FCOPYSIGN}; +static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN}; + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -1208,6 +1213,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } }; + // Sets common actions for zvfbfa, some of instructions are supported + // natively so that we don't need to promote them. + const auto SetZvfbfaActions = [&](MVT VT) { + setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); + setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, + Custom); + setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom); + setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); + setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom); + setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, + Custom); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom); + setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, + ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, + ISD::VECTOR_COMPRESS}, + VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom); + setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom); + + setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ZvfbfaVPOps, VT, Custom); + + MVT EltVT = VT.getVectorElementType(); + if (isTypeLegal(EltVT)) + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT, + ISD::EXTRACT_VECTOR_ELT}, + VT, Custom); + else + setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT}, + EltVT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE, + ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD, + ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, + ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, + ISD::VP_SCATTER}, + VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); + + // Expand FP operations that need libcalls. + setOperationAction(FloatingPointLibCallOps, VT, Expand); + + // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal. + if (getLMUL(VT) == RISCVVType::LMUL_8) { + setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom); + setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom); + } else { + MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount()); + setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT); + setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT); + } + }; + if (Subtarget.hasVInstructionsF16()) { for (MVT VT : F16VecVTs) { if (!isTypeLegal(VT)) @@ -1222,7 +1282,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, } } - if (Subtarget.hasVInstructionsBF16Minimal()) { + if (Subtarget.hasVInstructionsBF16()) { + for (MVT VT : BF16VecVTs) { + if (!isTypeLegal(VT)) + continue; + SetZvfbfaActions(VT); + } + } else if (Subtarget.hasVInstructionsBF16Minimal()) { for (MVT VT : BF16VecVTs) { if (!isTypeLegal(VT)) continue; @@ -1501,6 +1567,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // available. setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom); } + if (Subtarget.hasStdExtZvfbfa()) { + setOperationAction(ZvfbfaOps, VT, Custom); + setOperationAction(ZvfbfaVPOps, VT, Custom); + } setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, Custom); @@ -7245,7 +7315,11 @@ static bool isPromotedOpNeedingSplit(SDValue Op, return (Op.getValueType() == MVT::nxv32f16 && (Subtarget.hasVInstructionsF16Minimal() && !Subtarget.hasVInstructionsF16())) || - Op.getValueType() == MVT::nxv32bf16; + (Op.getValueType() == MVT::nxv32bf16 && + Subtarget.hasVInstructionsBF16Minimal() && + (!Subtarget.hasVInstructionsBF16() || + (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) && + !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode())))); } static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td index b9c5b75..ffb2ac0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td @@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in { FRM_DYN, fvti.AVL, fvti.Log2SEW, TA_MA)>; } -} + + foreach vti = AllBF16Vectors in { + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), + (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), + (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2))), + (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), + (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg vti.RegClass:$rs2)))), + (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), + (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), + (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; + + // 13.12. Vector Floating-Point Sign-Injection Instructions + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + // Handle fneg with VFSGNJN using the same input for both operands. + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (riscv_fneg_vl vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag), + srcvalue, + (vti.Mask true_mask), + VLOpFrag), + (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>; + + def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), + (SplatFPOp vti.ScalarRegClass:$rs2), + vti.RegClass:$passthru, + (vti.Mask VMV0:$vm), + VLOpFrag), + (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; + } + } } // Predicates = [HasStdExtZvfbfa] diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index f681b0d..ac09b93 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -29,6 +29,8 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float16_add}, {"SPV_EXT_shader_atomic_float_min_max", SPIRV::Extension::Extension::SPV_EXT_shader_atomic_float_min_max}, + {"SPV_INTEL_16bit_atomics", + SPIRV::Extension::Extension::SPV_INTEL_16bit_atomics}, {"SPV_EXT_arithmetic_fence", SPIRV::Extension::Extension::SPV_EXT_arithmetic_fence}, {"SPV_EXT_demote_to_helper_invocation", diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index af76016..fbb127d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1058,6 +1058,13 @@ static void addOpTypeImageReqs(const MachineInstr &MI, } } +static bool isBFloat16Type(const SPIRVType *TypeDef) { + return TypeDef && TypeDef->getNumOperands() == 3 && + TypeDef->getOpcode() == SPIRV::OpTypeFloat && + TypeDef->getOperand(1).getImm() == 16 && + TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR; +} + // Add requirements for handling atomic float instructions #define ATOM_FLT_REQ_EXT_MSG(ExtName) \ "The atomic float instruction requires the following SPIR-V " \ @@ -1081,11 +1088,21 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI, Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_add); switch (BitWidth) { case 16: - if (!ST.canUseExtension( - SPIRV::Extension::SPV_EXT_shader_atomic_float16_add)) - report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false); - Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add); - Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT); + if (isBFloat16Type(TypeDef)) { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics)) + report_fatal_error( + "The atomic bfloat16 instruction requires the following SPIR-V " + "extension: SPV_INTEL_16bit_atomics", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics); + Reqs.addCapability(SPIRV::Capability::AtomicBFloat16AddINTEL); + } else { + if (!ST.canUseExtension( + SPIRV::Extension::SPV_EXT_shader_atomic_float16_add)) + report_fatal_error(ATOM_FLT_REQ_EXT_MSG("16_add"), false); + Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float16_add); + Reqs.addCapability(SPIRV::Capability::AtomicFloat16AddEXT); + } break; case 32: Reqs.addCapability(SPIRV::Capability::AtomicFloat32AddEXT); @@ -1104,7 +1121,17 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI, Reqs.addExtension(SPIRV::Extension::SPV_EXT_shader_atomic_float_min_max); switch (BitWidth) { case 16: - Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT); + if (isBFloat16Type(TypeDef)) { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics)) + report_fatal_error( + "The atomic bfloat16 instruction requires the following SPIR-V " + "extension: SPV_INTEL_16bit_atomics", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_16bit_atomics); + Reqs.addCapability(SPIRV::Capability::AtomicBFloat16MinMaxINTEL); + } else { + Reqs.addCapability(SPIRV::Capability::AtomicFloat16MinMaxEXT); + } break; case 32: Reqs.addCapability(SPIRV::Capability::AtomicFloat32MinMaxEXT); @@ -1328,13 +1355,6 @@ void addPrintfRequirements(const MachineInstr &MI, } } -static bool isBFloat16Type(const SPIRVType *TypeDef) { - return TypeDef && TypeDef->getNumOperands() == 3 && - TypeDef->getOpcode() == SPIRV::OpTypeFloat && - TypeDef->getOperand(1).getImm() == 16 && - TypeDef->getOperand(2).getImm() == SPIRV::FPEncoding::BFloat16KHR; -} - void addInstrRequirements(const MachineInstr &MI, SPIRV::ModuleAnalysisInfo &MAI, const SPIRVSubtarget &ST) { diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 65a8885..f02a587 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -389,6 +389,7 @@ defm SPV_INTEL_predicated_io : ExtensionOperand<127, [EnvOpenCL]>; defm SPV_KHR_maximal_reconvergence : ExtensionOperand<128, [EnvVulkan]>; defm SPV_INTEL_bfloat16_arithmetic : ExtensionOperand<129, [EnvVulkan, EnvOpenCL]>; +defm SPV_INTEL_16bit_atomics : ExtensionOperand<130, [EnvVulkan, EnvOpenCL]>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -566,9 +567,11 @@ defm FloatControls2 defm AtomicFloat32AddEXT : CapabilityOperand<6033, 0, 0, [SPV_EXT_shader_atomic_float_add], []>; defm AtomicFloat64AddEXT : CapabilityOperand<6034, 0, 0, [SPV_EXT_shader_atomic_float_add], []>; defm AtomicFloat16AddEXT : CapabilityOperand<6095, 0, 0, [SPV_EXT_shader_atomic_float16_add], []>; +defm AtomicBFloat16AddINTEL : CapabilityOperand<6255, 0, 0, [SPV_INTEL_16bit_atomics], []>; defm AtomicFloat16MinMaxEXT : CapabilityOperand<5616, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>; defm AtomicFloat32MinMaxEXT : CapabilityOperand<5612, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>; defm AtomicFloat64MinMaxEXT : CapabilityOperand<5613, 0, 0, [SPV_EXT_shader_atomic_float_min_max], []>; +defm AtomicBFloat16MinMaxINTEL : CapabilityOperand<6256, 0, 0, [SPV_INTEL_16bit_atomics], []>; defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variable_length_array], []>; defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>; defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>; diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index f08a0c0..94ae64c 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -14,7 +14,6 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" -#include <array> #include <atomic> #include <optional> #include <string> diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index bd74388..8e76b79 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -83,7 +83,6 @@ #include <cstdint> #include <functional> #include <limits> -#include <map> #include <memory> #include <queue> #include <string> diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 27ec6c6..5bc9c28 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5627,8 +5627,15 @@ bool InstCombinerImpl::run() { for (Use &U : I->uses()) { User *User = U.getUser(); - if (User->isDroppable()) - continue; + if (User->isDroppable()) { + // Do not sink if there are dereferenceable assumes that would be + // removed. + auto II = dyn_cast<IntrinsicInst>(User); + if (II->getIntrinsicID() != Intrinsic::assume || + !II->getOperandBundle("dereferenceable")) + continue; + } + if (NumUsers > MaxSinkNumUsers) return std::nullopt; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 666033b..45b5570 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8157,9 +8157,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands()); if (Instruction::isCast(VPI->getOpcode())) { + auto *CastR = cast<VPInstructionWithType>(R); auto *CI = cast<CastInst>(Instr); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CI->getType(), *CI); + CastR->getResultType(), *CI); } return tryToWiden(VPI); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f405c40..663e31a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/MDBuilder.h" #define DEBUG_TYPE "vplan" @@ -233,10 +234,15 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, for (Value *Op : Inst->operands()) VPOperands.push_back(getOrCreateVPOperand(Op)); - // Build VPInstruction for any arbitrary Instruction without specific - // representation in VPlan. - NewR = cast<VPInstruction>( - VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst)); + if (auto *CI = dyn_cast<CastInst>(Inst)) { + NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], + CI->getType(), CI->getDebugLoc()); + NewR->setUnderlyingValue(CI); + } else { + // Build VPInstruction for any arbitrary Instruction without specific + // representation in VPlan. + NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst); + } } IRDef2VPValue[Inst] = NewR; |
