diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fdiv.f64.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmed3.bf16.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmed3.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-combines.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/frem.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/inline-attr.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.add.min.max.ll | 191 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.exp2.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.log2.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/minmax.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/stackguard.ll | 14 |
16 files changed, 232 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll index 57a1e4c..ec92edb 100644 --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -3385,7 +3385,7 @@ declare half @llvm.canonicalize.f16(half) declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) attributes #0 = { nounwind "amdgpu-ieee"="false" } -attributes #1 = { nounwind "unsafe-fp-math"="true" "no-nans-fp-math"="true" } +attributes #1 = { nounwind "no-nans-fp-math"="true" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11NONANS-FAKE16: {{.*}} ; GFX11NONANS-TRUE16: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll index acb32d4..11476a6 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv.f64.ll @@ -127,7 +127,7 @@ define amdgpu_kernel void @s_fdiv_v4f64(ptr addrspace(1) %out, <4 x double> %num ; GCN-LABEL: {{^}}div_fast_2_x_pat_f64: ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0.5 ; GCN: buffer_store_dwordx2 [[MUL]] -define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #1 { +define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #0 { %x = load double, ptr addrspace(1) poison %rcp = fdiv fast double %x, 2.0 store double %rcp, ptr addrspace(1) %out, align 4 @@ -139,7 +139,7 @@ define amdgpu_kernel void @div_fast_2_x_pat_f64(ptr addrspace(1) %out) #1 { ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0x3fb99999 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]] ; GCN: buffer_store_dwordx2 [[MUL]] -define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #1 { +define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #0 { %x = load double, ptr addrspace(1) poison %rcp = fdiv fast double %x, 10.0 store double %rcp, ptr addrspace(1) %out, align 4 @@ -151,7 +151,7 @@ define amdgpu_kernel void @div_fast_k_x_pat_f64(ptr addrspace(1) %out) #1 { ; GCN-DAG: v_mov_b32_e32 v[[K_HI:[0-9]+]], 0xbfb99999 ; GCN: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, v[[[K_LO]]:[[K_HI]]] ; GCN: buffer_store_dwordx2 [[MUL]] -define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) #1 { +define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) #0 { %x = load double, ptr addrspace(1) poison %rcp = fdiv fast double %x, -10.0 store double %rcp, ptr addrspace(1) %out, align 4 @@ -159,4 +159,3 @@ define amdgpu_kernel void @div_fast_neg_k_x_pat_f64(ptr addrspace(1) %out) #1 { } attributes #0 = { nounwind } -attributes #1 = { nounwind "unsafe-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll index 92eb4a6..0a266bc 100644 --- a/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll @@ -284,4 +284,4 @@ define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, < ret <2 x float> %tmp1 } -attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" } +attributes #0 = { "no-infs-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.bf16.ll b/llvm/test/CodeGen/AMDGPU/fmed3.bf16.ll index bc85dc2..3e513de 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.bf16.ll @@ -219,8 +219,8 @@ define <2 x bfloat> @v_test_fmed3_r_i_i_v2bf16_minimumnum_maximumnum(<2 x bfloat } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } -attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } +attributes #1 = { nounwind "no-nans-fp-math"="false" } +attributes #2 = { nounwind "no-nans-fp-math"="true" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX11: {{.*}} ; GFX11-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll index 3145a27..60ac0b9 100644 --- a/llvm/test/CodeGen/AMDGPU/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll @@ -8905,4 +8905,4 @@ declare half @llvm.minnum.f16(half, half) #0 declare half @llvm.maxnum.f16(half, half) #0 attributes #0 = { nounwind readnone } -attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } +attributes #2 = { nounwind "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll index d8bbda1..69d1ee3f 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll @@ -159,7 +159,7 @@ declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone } -attributes #2 = { nounwind "unsafe-fp-math"="true" } +attributes #2 = { nounwind } attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" } attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll index aaea4f7..b3202cb 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -8006,7 +8006,7 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone } -attributes #2 = { nounwind "unsafe-fp-math"="true" } +attributes #2 = { nounwind } attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN-NSZ: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 6f91222..d8cbdb1 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -2048,7 +2048,7 @@ define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace( ; GFX1200-FAKE16-NEXT: v_fmac_f16_e32 v1, v3, v2 ; GFX1200-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX1200-FAKE16-NEXT: s_endpgm - ptr addrspace(1) %in2) #1 { + ptr addrspace(1) %in2) #0 { %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 %r0 = load half, ptr addrspace(1) %in1, align 4 %r1 = load half, ptr addrspace(1) %gep2, align 4 @@ -3417,7 +3417,7 @@ define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace( ; GFX1200-NEXT: v_fmac_f32_e32 v1, v3, v2 ; GFX1200-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1200-NEXT: s_endpgm - ptr addrspace(1) %in2) #1 { + ptr addrspace(1) %in2) #0 { %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 %r0 = load float, ptr addrspace(1) %in1, align 4 %r1 = load float, ptr addrspace(1) %gep2, align 4 @@ -4821,7 +4821,7 @@ define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace( ; GFX1200-NEXT: v_fma_f64 v[0:1], -v[4:5], v[2:3], v[0:1] ; GFX1200-NEXT: global_store_b64 v12, v[0:1], s[0:1] ; GFX1200-NEXT: s_endpgm - ptr addrspace(1) %in2) #1 { + ptr addrspace(1) %in2) #0 { %r0 = load double, ptr addrspace(1) %in1, align 8 %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem afn double %r0, %r1 @@ -18918,7 +18918,4 @@ define amdgpu_kernel void @frem_v2f64_const(ptr addrspace(1) %out) #0 { -attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } -attributes #1 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } - - +attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll index 1b74ddf..9b97981 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll @@ -2870,7 +2870,7 @@ define double @v_sqrt_f64__enough_unsafe_attrs(double %x) #3 { ret double %result } -define double @v_sqrt_f64__unsafe_attr(double %x) #4 { +define double @v_sqrt_f64__unsafe_attr(double %x) { ; GFX6-SDAG-LABEL: v_sqrt_f64__unsafe_attr: ; GFX6-SDAG: ; %bb.0: ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3449,7 +3449,6 @@ declare i32 @llvm.amdgcn.readfirstlane(i32) #1 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { convergent nounwind willreturn memory(none) } attributes #3 = { "no-nans-fp-math"="true" "no-infs-fp-math"="true" } -attributes #4 = { "unsafe-fp-math"="true" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX6: {{.*}} ; GFX8: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll index 9f19bcb..c93c077 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll @@ -239,4 +239,4 @@ declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) #0 declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "unsafe-fp-math"="true" } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index 4e93eca..c33b3344 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -36,18 +36,18 @@ entry: ret void } -attributes #0 = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true"} -attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +attributes #0 = { nounwind "uniform-work-group-size"="false"} +attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" } ;. -; UNSAFE: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" "unsafe-fp-math"="true" } -; UNSAFE: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; UNSAFE: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" } +; UNSAFE: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "uniform-work-group-size"="false" } ;. -; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } -; NONANS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; NONANS: attributes #[[ATTR0]] = { nounwind "no-nans-fp-math"="true" "uniform-work-group-size"="false" } +; NONANS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "uniform-work-group-size"="false" } ;. -; NOINFS: attributes #[[ATTR0]] = { nounwind "no-infs-fp-math"="true" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } -; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" "unsafe-fp-math"="true" } +; NOINFS: attributes #[[ATTR0]] = { nounwind "no-infs-fp-math"="true" "uniform-work-group-size"="false" } +; NOINFS: attributes #[[ATTR1]] = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "uniform-work-group-size"="false" } ;. ; UNSAFE: [[META0]] = !{} ;. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.add.min.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.add.min.max.ll new file mode 100644 index 0000000..99421d4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.add.min.max.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250-GISEL %s + +declare i32 @llvm.amdgcn.add.min.i32(i32, i32, i32, i1) +declare i32 @llvm.amdgcn.add.max.i32(i32, i32, i32, i1) +declare i32 @llvm.amdgcn.add.min.u32(i32, i32, i32, i1) +declare i32 @llvm.amdgcn.add.max.u32(i32, i32, i32, i1) +declare <2 x i16> @llvm.amdgcn.pk.add.min.i16(<2 x i16>, <2 x i16>, <2 x i16>, i1) +declare <2 x i16> @llvm.amdgcn.pk.add.max.i16(<2 x i16>, <2 x i16>, <2 x i16>, i1) +declare <2 x i16> @llvm.amdgcn.pk.add.min.u16(<2 x i16>, <2 x i16>, <2 x i16>, i1) +declare <2 x i16> @llvm.amdgcn.pk.add.max.u16(<2 x i16>, <2 x i16>, <2 x i16>, i1) + +define i32 @test_add_min_i32_vvv(i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: test_add_min_i32_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.min.i32(i32 %a, i32 %b, i32 %c, i1 0) + ret i32 %ret +} + +define i32 @test_add_min_i32_ssi_clamp(i32 inreg %a, i32 inreg %b) { +; GCN-LABEL: test_add_min_i32_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_min_i32 v0, s0, s1, 1 clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.min.i32(i32 %a, i32 %b, i32 1, i1 1) + ret i32 %ret +} + +define i32 @test_add_min_u32_vvv(i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: test_add_min_u32_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.min.u32(i32 %a, i32 %b, i32 %c, i1 0) + ret i32 %ret +} + +define i32 @test_add_min_u32_ssi_clamp(i32 inreg %a, i32 inreg %b) { +; GCN-LABEL: test_add_min_u32_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_min_u32 v0, s0, s1, 1 clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.min.u32(i32 %a, i32 %b, i32 1, i1 1) + ret i32 %ret +} + +define i32 @test_add_max_i32_vvv(i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: test_add_max_i32_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.max.i32(i32 %a, i32 %b, i32 %c, i1 0) + ret i32 %ret +} + +define i32 @test_add_max_i32_ssi_clamp(i32 inreg %a, i32 inreg %b) { +; GCN-LABEL: test_add_max_i32_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_max_i32 v0, s0, s1, 1 clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.max.i32(i32 %a, i32 %b, i32 1, i1 1) + ret i32 %ret +} + +define i32 @test_add_max_u32_vvv(i32 %a, i32 %b, i32 %c) { +; GCN-LABEL: test_add_max_u32_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.max.u32(i32 %a, i32 %b, i32 %c, i1 0) + ret i32 %ret +} + +define i32 @test_add_max_u32_ssi_clamp(i32 inreg %a, i32 inreg %b) { +; GCN-LABEL: test_add_max_u32_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_add_max_u32 v0, s0, s1, 1 clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call i32 @llvm.amdgcn.add.max.u32(i32 %a, i32 %b, i32 1, i1 1) + ret i32 %ret +} + +define <2 x i16> @test_add_min_i16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { +; GCN-LABEL: test_add_min_i16_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_min_i16 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.min.i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, i1 0) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_min_i16_ssi_clamp(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GCN-LABEL: test_add_min_i16_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_min_i16 v0, s0, s1, 1 op_sel_hi:[1,1,0] clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.min.i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> <i16 1, i16 1>, i1 1) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_min_u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { +; GCN-LABEL: test_add_min_u16_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_min_u16 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.min.u16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, i1 0) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_min_u16_ssi_clamp(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GCN-LABEL: test_add_min_u16_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_min_u16 v0, s0, s1, 1 op_sel_hi:[1,1,0] clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.min.u16(<2 x i16> %a, <2 x i16> %b, <2 x i16> <i16 1, i16 1>, i1 1) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_max_i16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { +; GCN-LABEL: test_add_max_i16_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_max_i16 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.max.i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, i1 0) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_max_i16_ssi_clamp(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GCN-LABEL: test_add_max_i16_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_max_i16 v0, s0, s1, 1 op_sel_hi:[1,1,0] clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.max.i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> <i16 1, i16 1>, i1 1) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_max_u16_vvv(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { +; GCN-LABEL: test_add_max_u16_vvv: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_max_u16 v0, v0, v1, v2 +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.max.u16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, i1 0) + ret <2 x i16> %ret +} + +define <2 x i16> @test_add_max_u16_ssi_clamp(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GCN-LABEL: test_add_max_u16_ssi_clamp: +; GCN: ; %bb.0: +; GCN-NEXT: s_wait_loadcnt_dscnt 0x0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: v_pk_add_max_u16 v0, s0, s1, 1 op_sel_hi:[1,1,0] clamp +; GCN-NEXT: s_set_pc_i64 s[30:31] + %ret = tail call <2 x i16> @llvm.amdgcn.pk.add.max.u16(<2 x i16> %a, <2 x i16> %b, <2 x i16> <i16 1, i16 1>, i1 1) + ret <2 x i16> %ret +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1250-GISEL: {{.*}} +; GFX1250-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll index 883db20..e30a586 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll @@ -1485,7 +1485,7 @@ define float @v_exp2_f32_fast(float %in) { ret float %result } -define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { +define float @v_exp2_f32_unsafe_math_attr(float %in) { ; SI-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index 0854134..61a777f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -1907,7 +1907,7 @@ define float @v_log2_f32_fast(float %in) { ret float %result } -define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { +define float @v_log2_f32_unsafe_math_attr(float %in) { ; SI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll index d578d2e..60570bd 100644 --- a/llvm/test/CodeGen/AMDGPU/minmax.ll +++ b/llvm/test/CodeGen/AMDGPU/minmax.ll @@ -1296,4 +1296,4 @@ declare half @llvm.minnum.f16(half, half) declare half @llvm.maxnum.f16(half, half) declare float @llvm.minnum.f32(float, float) declare float @llvm.maxnum.f32(float, float) -attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } +attributes #0 = { nounwind "no-nans-fp-math"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/stackguard.ll b/llvm/test/CodeGen/AMDGPU/stackguard.ll new file mode 100644 index 0000000..393686f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/stackguard.ll @@ -0,0 +1,14 @@ +; RUN: not llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s +; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck %s + +; FIXME: To actually support stackguard, need to fix intrinsic to +; return pointer in any address space. + +; CHECK: error: unable to lower stackguard +define i1 @test_stackguard(ptr %p1) { + %p2 = call ptr @llvm.stackguard() + %res = icmp ne ptr %p2, %p1 + ret i1 %res +} + +declare ptr @llvm.stackguard() |