diff options
author | Rodrigo Dominguez <Rodrigo.Dominguez@amd.com> | 2021-03-30 13:53:17 -0400 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2022-06-23 14:20:33 +0100 |
commit | 971fa4b1961bcf8013481d0fe1d4864d7adda0f8 (patch) | |
tree | 075ce7f74bee93008141325fc5e02591ea8bed20 | |
parent | 74c3f9c1914eb85314fe0f4fd631b4ba32f17b2c (diff) | |
download | llvm-971fa4b1961bcf8013481d0fe1d4864d7adda0f8.zip llvm-971fa4b1961bcf8013481d0fe1d4864d7adda0f8.tar.gz llvm-971fa4b1961bcf8013481d0fe1d4864d7adda0f8.tar.bz2 |
[AMDGPU] GFX11: remove ShaderType from ds_ordered_count offset field
In GFX11 ShaderType is determined by the hardware and should no longer
be written into bits[3:2] of the ds_ordered_count offset field.
Differential Revision: https://reviews.llvm.org/D128196
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll | 6 |
3 files changed, 11 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 1893269..8fe735c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1328,12 +1328,14 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF); unsigned Offset0 = OrderedCountIndex << 2; - unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | - (Instruction << 4); + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4); if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) Offset1 |= (CountDw - 1) << 6; + if (STI.getGeneration() < AMDGPUSubtarget::GFX11) + Offset1 |= ShaderType << 2; + unsigned Offset = Offset0 | (Offset1 << 8); Register M0Val = MI.getOperand(2).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8f17e3a..887a086 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7113,12 +7113,14 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction()); unsigned Offset0 = OrderedCountIndex << 2; - unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) | - (Instruction << 4); + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4); if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) Offset1 |= (CountDw - 1) << 6; + if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11) + Offset1 |= ShaderType << 2; + unsigned Offset = Offset0 | (Offset1 << 8); SDValue Ops[] = { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll index 55204c9..01837b3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll @@ -25,7 +25,7 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { ; FUNC-LABEL: {{^}}ds_ordered_add_ps: ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN: s_mov_b32 m0, s0 -; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) @@ -36,7 +36,7 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { ; FUNC-LABEL: {{^}}ds_ordered_add_vs: ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN: s_mov_b32 m0, s0 -; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) @@ -47,7 +47,7 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { ; FUNC-LABEL: {{^}}ds_ordered_add_gs: ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN: s_mov_b32 m0, s0 -; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) { %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) |