diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/GCNSubtarget.h')
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSubtarget.h | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 5530886..f47ddf5 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -187,6 +187,7 @@ protected: bool HasFlatBufferGlobalAtomicFaddF64Inst = false; bool HasDefaultComponentZero = false; bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false; + bool HasEmulatedSystemScopeAtomics = false; bool HasDefaultComponentBroadcast = false; bool HasXF32Insts = false; /// The maximum number of instructions that may be placed within an S_CLAUSE, @@ -950,6 +951,12 @@ public: return HasAgentScopeFineGrainedRemoteMemoryAtomics; } + /// \return true is HW emulates system scope atomics unsupported by the PCI-e + /// via CAS loop. + bool hasEmulatedSystemScopeAtomics() const { + return HasEmulatedSystemScopeAtomics; + } + bool hasDefaultComponentZero() const { return HasDefaultComponentZero; } bool hasDefaultComponentBroadcast() const { @@ -1081,7 +1088,7 @@ public: } bool hasLDSFPAtomicAddF32() const { return GFX8Insts; } - bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; } + bool hasLDSFPAtomicAddF64() const { return GFX90AInsts || GFX1250Insts; } /// \returns true if the subtarget has the v_permlanex16_b32 instruction. bool hasPermLaneX16() const { return getGeneration() >= GFX10; } @@ -1555,12 +1562,16 @@ public: // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions. bool hasPkMinMax3Insts() const { return GFX1250Insts; } + // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction. + bool hasSGetShaderCyclesInst() const { return GFX1250Insts; } + // \returns true if target has S_SETPRIO_INC_WG instruction. bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; } // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead - // of sign-extending. - bool hasGetPCZeroExtension() const { return GFX12Insts; } + // of sign-extending. Note that GFX1250 has not only fixed the bug but also + // extended VA to 57 bits. + bool hasGetPCZeroExtension() const { return GFX12Insts && !GFX1250Insts; } /// \returns SGPR allocation granularity supported by the subtarget. unsigned getSGPRAllocGranule() const { |