diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/dag-divergence.ll | 30 |
2 files changed, 34 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0028fe1..749a361 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9206,10 +9206,10 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode * N, } break; case ISD::LOAD: { - const LoadSDNode *L = dyn_cast<LoadSDNode>(N); - // FIXME: Also needs to handle flat. - if (L->getMemOperand()->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS) - return true; + const LoadSDNode *L = cast<LoadSDNode>(N); + unsigned AS = L->getAddressSpace(); + // A flat load may access private memory. + return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS; } break; case ISD::CALLSEQ_END: return true; diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll new file mode 100644 index 0000000..6694fda --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}private_load_maybe_divergent: +; GCN: buffer_load_dword +; GCN-NOT: s_load_dword s +; GCN: flat_load_dword +; GCN-NOT: s_load_dword s +define amdgpu_kernel void @private_load_maybe_divergent(i32 addrspace(4)* %k, i32* %flat) { + %load = load volatile i32, i32 addrspace(5)* undef, align 4 + %gep = getelementptr inbounds i32, i32 addrspace(4)* %k, i32 %load + %maybe.not.uniform.load = load i32, i32 addrspace(4)* %gep, align 4 + store i32 %maybe.not.uniform.load, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}flat_load_maybe_divergent: +; GCN: s_load_dwordx4 +; GCN-NOT: s_load +; GCN: flat_load_dword +; GCN-NOT: s_load +; GCN: flat_load_dword +; GCN-NOT: s_load +; GCN: flat_store_dword +define amdgpu_kernel void @flat_load_maybe_divergent(i32 addrspace(4)* %k, i32* %flat) { + %load = load i32, i32* %flat, align 4 + %gep = getelementptr inbounds i32, i32 addrspace(4)* %k, i32 %load + %maybe.not.uniform.load = load i32, i32 addrspace(4)* %gep, align 4 + store i32 %maybe.not.uniform.load, i32 addrspace(1)* undef + ret void +} |
