aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2024-06-25 17:43:00 +0100
committerGitHub <noreply@github.com>2024-06-25 17:43:00 +0100
commitaaf50bf34f3a2007221c644384d238666cfc2bc3 (patch)
treeda24897dcafde246d6b356471e5dc63a311f2868
parentde7c1396f29b9bf7011912e7cfea9edad1efb492 (diff)
downloadllvm-aaf50bf34f3a2007221c644384d238666cfc2bc3.zip
llvm-aaf50bf34f3a2007221c644384d238666cfc2bc3.tar.gz
llvm-aaf50bf34f3a2007221c644384d238666cfc2bc3.tar.bz2
[AMDGPU] Disallow negative s_load offsets in isLegalAddressingMode (#91327)
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll89
2 files changed, 72 insertions, 27 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35774e4..b8ff5ed 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1610,6 +1610,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
}
+ if ((AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ AM.BaseOffs < 0) {
+ // Scalar (non-buffer) loads can only use a negative offset if
+ // soffset+offset is non-negative. Since the compiler can only prove that
+ // in a few special cases, it is safer to claim that negative offsets are
+ // not supported.
+ return false;
+ }
+
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
index 41d2360..c7f7f30 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
@@ -279,33 +279,19 @@ end:
}
define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
-; GFX678-LABEL: test_sink_smem_offset_neg400:
-; GFX678: ; %bb.0: ; %entry
-; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
-; GFX678-NEXT: s_addc_u32 s1, s1, -1
-; GFX678-NEXT: .LBB5_1: ; %loop
-; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX678-NEXT: s_waitcnt lgkmcnt(0)
-; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
-; GFX678-NEXT: s_add_i32 s2, s2, -1
-; GFX678-NEXT: s_cmp_lg_u32 s2, 0
-; GFX678-NEXT: s_cbranch_scc1 .LBB5_1
-; GFX678-NEXT: ; %bb.2: ; %end
-; GFX678-NEXT: s_endpgm
-;
-; GFX9-LABEL: test_sink_smem_offset_neg400:
-; GFX9: ; %bb.0: ; %entry
-; GFX9-NEXT: .LBB5_1: ; %loop
-; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT: s_add_i32 s2, s2, -1
-; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70
-; GFX9-NEXT: s_addc_u32 s5, s1, -1
-; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0
-; GFX9-NEXT: s_cmp_lg_u32 s2, 0
-; GFX9-NEXT: s_cbranch_scc1 .LBB5_1
-; GFX9-NEXT: ; %bb.2: ; %end
-; GFX9-NEXT: s_endpgm
+; GFX6789-LABEL: test_sink_smem_offset_neg400:
+; GFX6789: ; %bb.0: ; %entry
+; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
+; GFX6789-NEXT: s_addc_u32 s1, s1, -1
+; GFX6789-NEXT: .LBB5_1: ; %loop
+; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
+; GFX6789-NEXT: s_add_i32 s2, s2, -1
+; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
+; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX6789-NEXT: ; %bb.2: ; %end
+; GFX6789-NEXT: s_endpgm
;
; GFX12-LABEL: test_sink_smem_offset_neg400:
; GFX12: ; %bb.0: ; %entry
@@ -337,3 +323,52 @@ loop:
end:
ret void
}
+
+; Same for address space 6, constant 32-bit.
+define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
+; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
+; GFX6789: ; %bb.0: ; %entry
+; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
+; GFX6789-NEXT: s_mov_b32 s3, 0
+; GFX6789-NEXT: .LBB6_1: ; %loop
+; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
+; GFX6789-NEXT: s_add_i32 s1, s1, -1
+; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
+; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
+; GFX6789-NEXT: ; %bb.2: ; %end
+; GFX6789-NEXT: s_endpgm
+;
+; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
+; GFX12-NEXT: s_mov_b32 s3, 0
+; GFX12-NEXT: .LBB6_1: ; %loop
+; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX12-NEXT: s_add_co_i32 s1, s1, -1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: s_cmp_lg_u32 s1, 0
+; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
+; GFX12-NEXT: ; %bb.2: ; %end
+; GFX12-NEXT: s_endpgm
+entry:
+ %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
+ br label %loop
+
+loop:
+ %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
+ %dec = sub i32 %count, 1
+ %load = load volatile i32, ptr addrspace(6) %gep
+ %cond = icmp eq i32 %dec, 0
+ br i1 %cond, label %end, label %loop
+
+end:
+ ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX678: {{.*}}
+; GFX9: {{.*}}