aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXSubtarget.h')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h34
1 files changed, 34 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index e81c56b..194dbdc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -73,6 +73,18 @@ public:
const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;
+ // Checks PTX version and family-specific and architecture-specific SM
+ // versions. For example, sm_100{f/a} and any future variants in the same
+ // family will match for any PTX version greater than or equal to
+ // `PTXVersion`.
+ bool hasPTXWithFamilySMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const;
+ // Checks PTX version and architecture-specific SM versions.
+ // For example, sm_100{a} will match for any PTX version greater than or equal
+ // to `PTXVersion`.
+ bool hasPTXWithAccelSMs(unsigned PTXVersion,
+ ArrayRef<unsigned> SMVersions) const;
+
bool has256BitVectorLoadStore(unsigned AS) const {
return SmVersion >= 100 && PTXVersion >= 88 &&
AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
@@ -127,6 +139,27 @@ public:
return HasTcgen05 && PTXVersion >= MinPTXVersion;
}
+ // Checks following instructions support:
+ // - tcgen05.ld/st
+ // - tcgen05.alloc/dealloc/relinquish
+ // - tcgen05.cp
+ // - tcgen05.fence/wait
+ // - tcgen05.commit
+ bool hasTcgen05InstSupport() const {
+ // sm_101 renamed to sm_110 in PTX 9.0
+ return hasPTXWithFamilySMs(90, {100, 110}) ||
+ hasPTXWithFamilySMs(88, {100, 101}) ||
+ hasPTXWithAccelSMs(86, {100, 101});
+ }
+
+ // Checks tcgen05.shift instruction support.
+ bool hasTcgen05ShiftSupport() const {
+ // sm_101 renamed to sm_110 in PTX 9.0
+ return hasPTXWithAccelSMs(90, {100, 110, 103}) ||
+ hasPTXWithAccelSMs(88, {100, 101, 103}) ||
+ hasPTXWithAccelSMs(86, {100, 101});
+ }
+
bool hasTcgen05MMAScaleInputDImm() const {
return FullSmVersion == 1003 && PTXVersion >= 86;
}
@@ -158,6 +191,7 @@ public:
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
+ unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; }
// GPUs with "a" suffix have architecture-accelerated features that are
// supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasArchAccelFeatures() allows distinguishing