diff options
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXSubtarget.h')
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index e81c56b..194dbdc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -73,6 +73,18 @@ public: const SelectionDAGTargetInfo *getSelectionDAGInfo() const override; + // Checks PTX version and family-specific and architecture-specific SM + // versions. For example, sm_100{f/a} and any future variants in the same + // family will match for any PTX version greater than or equal to + // `PTXVersion`. + bool hasPTXWithFamilySMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + // Checks PTX version and architecture-specific SM versions. + // For example, sm_100{a} will match for any PTX version greater than or equal + // to `PTXVersion`. + bool hasPTXWithAccelSMs(unsigned PTXVersion, + ArrayRef<unsigned> SMVersions) const; + bool has256BitVectorLoadStore(unsigned AS) const { return SmVersion >= 100 && PTXVersion >= 88 && AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; @@ -127,6 +139,27 @@ public: return HasTcgen05 && PTXVersion >= MinPTXVersion; } + // Checks following instructions support: + // - tcgen05.ld/st + // - tcgen05.alloc/dealloc/relinquish + // - tcgen05.cp + // - tcgen05.fence/wait + // - tcgen05.commit + bool hasTcgen05InstSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithFamilySMs(90, {100, 110}) || + hasPTXWithFamilySMs(88, {100, 101}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + + // Checks tcgen05.shift instruction support. + bool hasTcgen05ShiftSupport() const { + // sm_101 renamed to sm_110 in PTX 9.0 + return hasPTXWithAccelSMs(90, {100, 110, 103}) || + hasPTXWithAccelSMs(88, {100, 101, 103}) || + hasPTXWithAccelSMs(86, {100, 101}); + } + bool hasTcgen05MMAScaleInputDImm() const { return FullSmVersion == 1003 && PTXVersion >= 86; } @@ -158,6 +191,7 @@ public: bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; } unsigned int getFullSmVersion() const { return FullSmVersion; } unsigned int getSmVersion() const { return getFullSmVersion() / 10; } + unsigned int getSmFamilyVersion() const { return getFullSmVersion() / 100; } // GPUs with "a" suffix have architecture-accelerated features that are // supported on the specified architecture only, hence such targets do not // follow the onion layer model. hasArchAccelFeatures() allows distinguishing |