aboutsummaryrefslogtreecommitdiff
path: root/llvm/include
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h22
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h6
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h24
-rw-r--r--llvm/include/llvm/CodeGen/MachineCopyPropagation.h35
-rw-r--r--llvm/include/llvm/CodeGen/ReachingDefAnalysis.h10
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h12
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td30
-rw-r--r--llvm/include/llvm/InitializePasses.h2
-rw-r--r--llvm/include/llvm/Passes/CodeGenPassBuilder.h1
-rw-r--r--llvm/include/llvm/Passes/MachinePassRegistry.def2
-rw-r--r--llvm/include/llvm/Support/NVPTXAddrSpace.h26
11 files changed, 162 insertions, 8 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index ee93aba..08ab4ee 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1517,6 +1517,19 @@ public:
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
+ /// \return The cost of Expand Load or Compress Store operation
+ /// \p Opcode - is a type of memory access Load or Store
+ /// \p Src - a vector type of the data to be loaded or stored
+ /// \p VariableMask - true when the memory access is predicated with a mask
+ /// that is not a compile-time constant
+ /// \p Alignment - alignment of single element
+ /// \p I - the optional original context instruction, if one exists, e.g. the
+ /// load/store to transform or the call to the gather/scatter intrinsic
+ InstructionCost getExpandCompressMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
+
/// \return The cost of strided memory operations.
/// \p Opcode - is a type of memory access Load or Store
/// \p DataTy - a vector type of the data to be loaded or stored
@@ -2228,6 +2241,9 @@ public:
bool VariableMask, Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) = 0;
+ virtual InstructionCost getExpandCompressMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I = nullptr) = 0;
virtual InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
@@ -2963,6 +2979,12 @@ public:
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
}
+ InstructionCost getExpandCompressMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I = nullptr) override {
+ return Impl.getExpandCompressMemoryOpCost(Opcode, DataTy, VariableMask,
+ Alignment, CostKind, I);
+ }
InstructionCost
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
bool VariableMask, Align Alignment,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b51663a..5128c6b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -774,6 +774,12 @@ public:
return 1;
}
+ InstructionCost getExpandCompressMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
+ return 1;
+ }
+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 9571bd9..a76de25 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1468,6 +1468,15 @@ public:
true, CostKind);
}
+ InstructionCost getExpandCompressMemoryOpCost(
+ unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind, const Instruction *I = nullptr) {
+ // Treat expand load/compress store as gather/scatter operation.
+ // TODO: implement more precise cost estimation for these intrinsics.
+ return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
+ /*IsGatherScatter*/ true, CostKind);
+ }
+
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
@@ -1776,6 +1785,21 @@ public:
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
VarMask, Alignment, CostKind, I);
}
+ case Intrinsic::masked_compressstore: {
+ const Value *Data = Args[0];
+ const Value *Mask = Args[2];
+ Align Alignment = I->getParamAlign(1).valueOrOne();
+ return thisT()->getExpandCompressMemoryOpCost(
+ Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
+ CostKind, I);
+ }
+ case Intrinsic::masked_expandload: {
+ const Value *Mask = Args[1];
+ Align Alignment = I->getParamAlign(0).valueOrOne();
+ return thisT()->getExpandCompressMemoryOpCost(Instruction::Load, RetTy,
+ !isa<Constant>(Mask),
+ Alignment, CostKind, I);
+ }
case Intrinsic::experimental_vp_strided_store: {
const Value *Data = Args[0];
const Value *Ptr = Args[1];
diff --git a/llvm/include/llvm/CodeGen/MachineCopyPropagation.h b/llvm/include/llvm/CodeGen/MachineCopyPropagation.h
new file mode 100644
index 0000000..2fe2646
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineCopyPropagation.h
@@ -0,0 +1,35 @@
+//===- llvm/CodeGen/MachineCopyPropagation.h --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINECOPYPROPAGATION_H
+#define LLVM_CODEGEN_MACHINECOPYPROPAGATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class MachineCopyPropagationPass
+ : public PassInfoMixin<MachineCopyPropagationPass> {
+ bool UseCopyInstr;
+
+public:
+ MachineCopyPropagationPass(bool UseCopyInstr = false)
+ : UseCopyInstr(UseCopyInstr) {}
+
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+
+ MachineFunctionProperties getRequiredProperties() const {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINECOPYPROPAGATION_H
diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index cff422f..978e84b 100644
--- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -141,12 +141,12 @@ private:
DenseMap<MachineInstr *, int> InstIds;
MBBReachingDefsInfo MBBReachingDefs;
+
+ /// MBBFrameObjsReachingDefs[{i, j}] is a list of instruction indices
+ /// (relative to begining of MBB i) that define frame index j in MBB i. This
+ /// is used in answering reaching definition queries.
using MBBFrameObjsReachingDefsInfo =
- DenseMap<unsigned, DenseMap<int, SmallVector<int>>>;
- // MBBFrameObjsReachingDefs[i][j] is a list of instruction indices (relative
- // to begining of MBB) that define frame index (j +
- // MF->getFrameInfo().getObjectIndexBegin()) in MBB i. This is used in
- // answering reaching definition queries.
+ DenseMap<std::pair<unsigned, int>, SmallVector<int>>;
MBBFrameObjsReachingDefsInfo MBBFrameObjsReachingDefs;
/// Default values are 'nothing happened a long time ago'.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9fcd2ac..04ee24c 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5622,6 +5622,18 @@ public:
// joining their results. SDValue() is returned when expansion did not happen.
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const;
+ /// Replace an extraction of a load with a narrowed load.
+ ///
+ /// \param ResultVT type of the result extraction.
+ /// \param InVecVT type of the input vector to with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad vector load that to be replaced.
+ /// \returns \p ResultVT Load on success SDValue() on failure.
+ SDValue scalarizeExtractedVectorLoad(EVT ResultVT, const SDLoc &DL,
+ EVT InVecVT, SDValue EltNo,
+ LoadSDNode *OriginalLoad,
+ SelectionDAG &DAG) const;
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 9a2f38d..abbe25b 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -48,6 +48,7 @@
def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
+def llvm_tmem_ptr_ty : LLVMQualPointerType<6>; // (tensor memory)ptr
//
// MISC
@@ -5055,4 +5056,33 @@ def int_nvvm_cp_async_bulk_prefetch_L2
def int_nvvm_griddepcontrol_launch_dependents: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
def int_nvvm_griddepcontrol_wait: Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
+//
+// Tcgen05 family of Intrinsics
+//
+
+// Tcgen05 alloc/dealloc related intrinsics
+
+foreach cta_group = ["cg1", "cg2"] in {
+ def int_nvvm_tcgen05_alloc_ # cta_group : Intrinsic<[],
+ [llvm_ptr_ty, // dst_ptr
+ llvm_i32_ty] , // num_columns
+ [IntrConvergent, IntrInaccessibleMemOrArgMemOnly,
+ WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
+ def int_nvvm_tcgen05_alloc_shared_ # cta_group : Intrinsic<[],
+ [llvm_shared_ptr_ty, // dst_ptr
+ llvm_i32_ty], // num_columns
+ [IntrConvergent, IntrInaccessibleMemOrArgMemOnly,
+ WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
+
+ def int_nvvm_tcgen05_dealloc_ # cta_group : Intrinsic<[],
+ [llvm_tmem_ptr_ty, // tmem_addr
+ llvm_i32_ty], // num_columns
+ [IntrConvergent, IntrArgMemOnly,
+ NoCapture<ArgIndex<0>>]>;
+
+ def int_nvvm_tcgen05_relinq_alloc_permit_ # cta_group : Intrinsic<[], [],
+ [IntrConvergent, IntrInaccessibleMemOnly]>;
+}
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 46fcd17..053f955 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -192,7 +192,7 @@ void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
void initializeMachineCFGPrinterPass(PassRegistry &);
void initializeMachineCSELegacyPass(PassRegistry &);
void initializeMachineCombinerPass(PassRegistry &);
-void initializeMachineCopyPropagationPass(PassRegistry &);
+void initializeMachineCopyPropagationLegacyPass(PassRegistry &);
void initializeMachineCycleInfoPrinterPassPass(PassRegistry &);
void initializeMachineCycleInfoWrapperPassPass(PassRegistry &);
void initializeMachineDominanceFrontierPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 9681368..2e89875 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineCSE.h"
+#include "llvm/CodeGen/MachineCopyPropagation.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 1d978f2..3519910 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -140,6 +140,7 @@ MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
+MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
@@ -235,7 +236,6 @@ DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass)
DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass)
DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass)
-DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass)
DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", MachineFunctionSplitterPass)
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass)
diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h
index 93eae39..486a396 100644
--- a/llvm/include/llvm/Support/NVPTXAddrSpace.h
+++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h
@@ -17,17 +17,41 @@
namespace llvm {
namespace NVPTXAS {
+
enum AddressSpace : unsigned {
ADDRESS_SPACE_GENERIC = 0,
ADDRESS_SPACE_GLOBAL = 1,
ADDRESS_SPACE_SHARED = 3,
ADDRESS_SPACE_CONST = 4,
ADDRESS_SPACE_LOCAL = 5,
+ ADDRESS_SPACE_TENSOR = 6,
ADDRESS_SPACE_PARAM = 101,
};
-} // end namespace NVPTXAS
+// According to official PTX Writer's Guide, DWARF debug information should
+// contain DW_AT_address_class attribute for all variables and parameters.
+// It's required for cuda-gdb to be able to properly reflect the memory space
+// of variable address. Acceptable address class codes are listed in this enum.
+//
+// More detailed information:
+// https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf-definitions
+enum DWARF_AddressSpace : unsigned {
+ DWARF_ADDR_code_space = 1,
+ DWARF_ADDR_reg_space = 2,
+ DWARF_ADDR_sreg_space = 3,
+ DWARF_ADDR_const_space = 4,
+ DWARF_ADDR_global_space = 5,
+ DWARF_ADDR_local_space = 6,
+ DWARF_ADDR_param_space = 7,
+ DWARF_ADDR_shared_space = 8,
+ DWARF_ADDR_surf_space = 9,
+ DWARF_ADDR_tex_space = 10,
+ DWARF_ADDR_tex_sampler_space = 11,
+ DWARF_ADDR_generic_space = 12
+};
+
+} // end namespace NVPTXAS
} // end namespace llvm
#endif // LLVM_SUPPORT_NVPTXADDRSPACE_H