diff options
Diffstat (limited to 'llvm/include')
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 22 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 6 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/BasicTTIImpl.h | 24 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/MachineCopyPropagation.h | 35 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/ReachingDefAnalysis.h | 10 | ||||
-rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 12 | ||||
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsNVVM.td | 30 | ||||
-rw-r--r-- | llvm/include/llvm/InitializePasses.h | 2 | ||||
-rw-r--r-- | llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 | ||||
-rw-r--r-- | llvm/include/llvm/Passes/MachinePassRegistry.def | 2 | ||||
-rw-r--r-- | llvm/include/llvm/Support/NVPTXAddrSpace.h | 26 |
11 files changed, 162 insertions, 8 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index ee93aba..08ab4ee 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1517,6 +1517,19 @@ public: Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; + /// \return The cost of Expand Load or Compress Store operation + /// \p Opcode - is a type of memory access Load or Store + /// \p Src - a vector type of the data to be loaded or stored + /// \p VariableMask - true when the memory access is predicated with a mask + /// that is not a compile-time constant + /// \p Alignment - alignment of single element + /// \p I - the optional original context instruction, if one exists, e.g. the + /// load/store to transform or the call to the gather/scatter intrinsic + InstructionCost getExpandCompressMemoryOpCost( + unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; + /// \return The cost of strided memory operations. /// \p Opcode - is a type of memory access Load or Store /// \p DataTy - a vector type of the data to be loaded or stored @@ -2228,6 +2241,9 @@ public: bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; + virtual InstructionCost getExpandCompressMemoryOpCost( + unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0; virtual InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, @@ -2963,6 +2979,12 @@ public: return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I); } + InstructionCost getExpandCompressMemoryOpCost( + unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) override { + return Impl.getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask, + Alignment, CostKind, I); + } InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index b51663a..5128c6b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -774,6 +774,12 @@ public: return 1; } + InstructionCost getExpandCompressMemoryOpCost( + unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const { + return 1; + } + InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 9571bd9..a76de25 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1468,6 +1468,15 @@ public: true, CostKind); } + InstructionCost getExpandCompressMemoryOpCost( + unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { + // Treat expand load/compress store as gather/scatter operation. + // TODO: implement more precise cost estimation for these intrinsics. + return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask, + /*IsGatherScatter*/ true, CostKind); + } + InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, @@ -1776,6 +1785,21 @@ public: return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::masked_compressstore: { + const Value *Data = Args[0]; + const Value *Mask = Args[2]; + Align Alignment = I->getParamAlign(1).valueOrOne(); + return thisT()->getExpandCompressMemoryOpCost( + Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment, + CostKind, I); + } + case Intrinsic::masked_expandload: { + const Value *Mask = Args[1]; + Align Alignment = I->getParamAlign(0).valueOrOne(); + return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy, + !isa<Constant>(Mask), + Alignment, CostKind, I); + } case Intrinsic::experimental_vp_strided_store: { const Value *Data = Args[0]; const Value *Ptr = Args[1]; diff --git a/llvm/include/llvm/CodeGen/MachineCopyPropagation.h b/llvm/include/llvm/CodeGen/MachineCopyPropagation.h new file mode 100644 index 0000000..2fe2646 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineCopyPropagation.h @@ -0,0 +1,35 @@ +//===- llvm/CodeGen/MachineCopyPropagation.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINECOPYPROPAGATION_H +#define LLVM_CODEGEN_MACHINECOPYPROPAGATION_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class MachineCopyPropagationPass + : public PassInfoMixin<MachineCopyPropagationPass> { + bool UseCopyInstr; + +public: + MachineCopyPropagationPass(bool UseCopyInstr = false) + : UseCopyInstr(UseCopyInstr) {} + + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + + MachineFunctionProperties getRequiredProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINECOPYPROPAGATION_H diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h index cff422f..978e84b 100644 --- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -141,12 +141,12 @@ private: DenseMap<MachineInstr *, int> InstIds; MBBReachingDefsInfo MBBReachingDefs; + + /// MBBFrameObjsReachingDefs[{i, j}] is a list of instruction indices + /// (relative to begining of MBB i) that define frame index j in MBB i. This + /// is used in answering reaching definition queries. using MBBFrameObjsReachingDefsInfo = - DenseMap<unsigned, DenseMap<int, SmallVector<int>>>; - // MBBFrameObjsReachingDefs[i][j] is a list of instruction indices (relative - // to begining of MBB) that define frame index (j + - // MF->getFrameInfo().getObjectIndexBegin()) in MBB i. This is used in - // answering reaching definition queries. + DenseMap<std::pair<unsigned, int>, SmallVector<int>>; MBBFrameObjsReachingDefsInfo MBBFrameObjsReachingDefs; /// Default values are 'nothing happened a long time ago'. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 9fcd2ac..04ee24c 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -5622,6 +5622,18 @@ public: // joining their results. SDValue() is returned when expansion did not happen. SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const; + /// Replace an extraction of a load with a narrowed load. + /// + /// \param ResultVT type of the result extraction. + /// \param InVecVT type of the input vector to with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad vector load that to be replaced. + /// \returns \p ResultVT Load on success SDValue() on failure. + SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL, + EVT InVecVT, SDValue EltNo, + LoadSDNode *OriginalLoad, + SelectionDAG &DAG) const; + private: SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 9a2f38d..abbe25b 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -48,6 +48,7 @@ def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr +def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr // // MISC @@ -5055,4 +5056,33 @@ def int_nvvm_cp_async_bulk_prefetch_L2 def int_nvvm_griddepcontrol_launch_dependents: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_nvvm_griddepcontrol_wait: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; +// +// Tcgen05 family of Intrinsics +// + +// Tcgen05 alloc/dealloc related intrinsics + +foreach cta_group = ["cg1", "cg2"] in { + def int_nvvm_tcgen05_alloc_ # cta_group : Intrinsic<[], + [llvm_ptr_ty, // dst_ptr + llvm_i32_ty] , // num_columns + [IntrConvergent, IntrInaccessibleMemOrArgMemOnly, + WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>; + + def int_nvvm_tcgen05_alloc_shared_ # cta_group : Intrinsic<[], + [llvm_shared_ptr_ty, // dst_ptr + llvm_i32_ty], // num_columns + [IntrConvergent, IntrInaccessibleMemOrArgMemOnly, + WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>; + + def int_nvvm_tcgen05_dealloc_ # cta_group : Intrinsic<[], + [llvm_tmem_ptr_ty, // tmem_addr + llvm_i32_ty], // num_columns + [IntrConvergent, IntrArgMemOnly, + NoCapture<ArgIndex<0>>]>; + + def int_nvvm_tcgen05_relinq_alloc_permit_ # cta_group : Intrinsic<[], [], + [IntrConvergent, IntrInaccessibleMemOnly]>; +} + } // let TargetPrefix = "nvvm" diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 46fcd17..053f955 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -192,7 +192,7 @@ void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &); void initializeMachineCFGPrinterPass(PassRegistry &); void initializeMachineCSELegacyPass(PassRegistry &); void initializeMachineCombinerPass(PassRegistry &); -void initializeMachineCopyPropagationPass(PassRegistry &); +void initializeMachineCopyPropagationLegacyPass(PassRegistry &); void initializeMachineCycleInfoPrinterPassPass(PassRegistry &); void initializeMachineCycleInfoWrapperPassPass(PassRegistry &); void initializeMachineDominanceFrontierPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 9681368..2e89875 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -45,6 +45,7 @@ #include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineCSE.h" +#include "llvm/CodeGen/MachineCopyPropagation.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineLICM.h" #include "llvm/CodeGen/MachineModuleInfo.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 1d978f2..3519910 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -140,6 +140,7 @@ MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) +MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass()) MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass()) @@ -235,7 +236,6 @@ DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass) DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass) DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass) DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass) -DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass) DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter) DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass) DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass) diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h index 93eae39..486a396 100644 --- a/llvm/include/llvm/Support/NVPTXAddrSpace.h +++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h @@ -17,17 +17,41 @@ namespace llvm { namespace NVPTXAS { + enum AddressSpace : unsigned { ADDRESS_SPACE_GENERIC = 0, ADDRESS_SPACE_GLOBAL = 1, ADDRESS_SPACE_SHARED = 3, ADDRESS_SPACE_CONST = 4, ADDRESS_SPACE_LOCAL = 5, + ADDRESS_SPACE_TENSOR = 6, ADDRESS_SPACE_PARAM = 101, }; -} // end namespace NVPTXAS +// According to official PTX Writer's Guide, DWARF debug information should +// contain DW_AT_address_class attribute for all variables and parameters. +// It's required for cuda-gdb to be able to properly reflect the memory space +// of variable address. Acceptable address class codes are listed in this enum. +// +// More detailed information: +// https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf-definitions +enum DWARF_AddressSpace : unsigned { + DWARF_ADDR_code_space = 1, + DWARF_ADDR_reg_space = 2, + DWARF_ADDR_sreg_space = 3, + DWARF_ADDR_const_space = 4, + DWARF_ADDR_global_space = 5, + DWARF_ADDR_local_space = 6, + DWARF_ADDR_param_space = 7, + DWARF_ADDR_shared_space = 8, + DWARF_ADDR_surf_space = 9, + DWARF_ADDR_tex_space = 10, + DWARF_ADDR_tex_sampler_space = 11, + DWARF_ADDR_generic_space = 12 +}; + +} // end namespace NVPTXAS } // end namespace llvm #endif // LLVM_SUPPORT_NVPTXADDRSPACE_H |