diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-03-13 19:47:31 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-03-13 19:47:31 +0000 |
commit | 971c85ebb4416b5bf8c5c878ff49fc773f04c664 (patch) | |
tree | 404acbd2e72408b7bf3223da971875c8ae50a6c1 | |
parent | 3978b877d7773c77577353239158941e208bbfea (diff) | |
download | llvm-971c85ebb4416b5bf8c5c878ff49fc773f04c664.zip llvm-971c85ebb4416b5bf8c5c878ff49fc773f04c664.tar.gz llvm-971c85ebb4416b5bf8c5c878ff49fc773f04c664.tar.bz2 |
AMDGPU: Treat 0 as private null pointer in addrspacecast lowering
llvm-svn: 297658
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nullptr.ll | 4 |
4 files changed, 23 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index bf0b954..e36e940 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -62,7 +62,6 @@ public: /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { switch(AddrSpace) { - case AMDGPUAS::PRIVATE_ADDRESS: case AMDGPUAS::LOCAL_ADDRESS: case AMDGPUAS::REGION_ADDRESS: return -1; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f6a4dc5..8a621952 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2374,21 +2374,25 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, MachineMemOperand::MOInvariant); } +// FIXME: Really support non-0 null pointers. +static int getSegmentNullPtrValue(unsigned AS) { + return AS == AMDGPUAS::LOCAL_ADDRESS ? -1 : 0; +} + SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op); SDValue Src = ASC->getOperand(0); - - // FIXME: Really support non-0 null pointers. - SDValue SegmentNullPtr = DAG.getConstant(-1, SL, MVT::i32); SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64); // flat -> local/private if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { - if (ASC->getDestAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - ASC->getDestAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { + unsigned DestAS = ASC->getDestAddressSpace(); + if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) { + SDValue SegmentNullPtr + = DAG.getConstant(getSegmentNullPtrValue(DestAS), SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src); @@ -2399,8 +2403,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, // local/private -> flat if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { - if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - ASC->getSrcAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) { + unsigned SrcAS = ASC->getSrcAddressSpace(); + if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { + SDValue SegmentNullPtr + = DAG.getConstant(getSegmentNullPtrValue(SrcAS), SL, MVT::i32); + SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE); diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 12dcda9..b6ada5e 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -49,7 +49,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 +; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 ; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] ; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 @@ -116,7 +116,7 @@ define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 { ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] +; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} define void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 { @@ -195,14 +195,14 @@ define void @cast_neg1_flat_to_group_addrspacecast() #0 { ret void } +; FIXME: Shouldn't need to enable queue ptr ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: -; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 -; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] - -; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base +; CI: enable_sgpr_queue_ptr = 1 +; GFX9: enable_sgpr_queue_ptr = 0 ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define void @cast_0_private_to_flat_addrspacecast() #0 { %cast = addrspacecast i32* null to i32 addrspace(4)* @@ -211,7 +211,7 @@ define void @cast_0_private_to_flat_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: -; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} ; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen define void @cast_0_flat_to_private_addrspacecast() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll index b4bdf71..0df16da 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -3,7 +3,7 @@ %struct.S = type { i32*, i32 addrspace(1)*, i32 addrspace(2)*, i32 addrspace(3)*, i32 addrspace(4)*, i32 addrspace(5)*} ; CHECK-LABEL: nullptr_priv: -; CHECK-NEXT: .long -1 +; CHECK-NEXT: .long 0 @nullptr_priv = global i32* addrspacecast (i32 addrspace(4)* null to i32*) ; CHECK-LABEL: nullptr_glob: @@ -95,7 +95,7 @@ @nullptr23 = global i32 addrspace(23)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(23)*) ; CHECK-LABEL: structWithPointers: -; CHECK-NEXT: .long -1 +; CHECK-NEXT: .long 0 ; CHECK-NEXT: .zero 4 ; CHECK-NEXT: .quad 0 ; CHECK-NEXT: .quad 0 |