; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: not llc < %s -mcpu=sm_90 -mattr=+ptx82 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: not llc < %s -mcpu=sm_80 -mattr=+ptx84 2>&1 | FileCheck %s --check-prefix=ERROR ; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx84 | FileCheck %s --check-prefix=CHECK ; RUN: %if ptxas-sm_90 && ptxas-isa-8.4 %{ llc < %s -mcpu=sm_90 -mattr=+ptx84 | %ptxas-verify -arch=sm_90 %} ;; TODO: Update cmpxchg.py so that it can automatically generate the IR for ;; these test cases. target triple = "nvptx64-nvidia-cuda" ;; Check that the first couple of error messages are correct. ; ERROR: error: unsupported cmpxchg ; ERROR: error: unsupported cmpxchg define i128 @test_xchg_generic(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_generic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_generic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_generic_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_global(ptr addrspace(1) %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_global( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_global_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_global_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.global.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr addrspace(1) %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_shared(ptr addrspace(3) %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_shared( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.shared.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr addrspace(3) %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_shared_cluster(ptr addrspace(7) %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_shared_cluster( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_shared_cluster_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_shared_cluster_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.shared::cluster.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr addrspace(7) %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_block(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_block( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_block_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_block_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.cta.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("block") release ret i128 %old } define i128 @test_xchg_cluster(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_cluster( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_cluster_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_cluster_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.cluster.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("cluster") release ret i128 %old } define i128 @test_xchg_gpu(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_gpu( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_gpu_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_gpu_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.gpu.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt syncscope("device") release ret i128 %old } define i128 @test_xchg_sys(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_sys( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_sys_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_sys_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_relaxed(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_relaxed( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_relaxed_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_relaxed_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.relaxed.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt monotonic ret i128 %old } define i128 @test_xchg_acquire(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acquire_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acquire_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.acquire.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt acquire ret i128 %old } define i128 @test_xchg_release(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_release( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_release_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_release_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.release.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt release ret i128 %old } define i128 @test_xchg_acq_rel(ptr %addr, i128 %amt) { ; CHECK-LABEL: test_xchg_acq_rel( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_xchg_acq_rel_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_xchg_acq_rel_param_1]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd3}; ; CHECK-NEXT: atom.acq_rel.sys.exch.b128 dst, [%rd1], amt; ; CHECK-NEXT: mov.b128 {%rd4, %rd5}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %old = atomicrmw xchg ptr %addr, i128 %amt acq_rel ret i128 %old } define i128 @test_cmpxchg_generic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_generic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_generic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_generic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_generic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_global(ptr addrspace(1) %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_global( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_global_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_global_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_global_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.sys.global.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr addrspace(1) %addr, i128 %cmp, i128 %new monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_shared(ptr addrspace(3) %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_shared( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.sys.shared.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr addrspace(3) %addr, i128 %cmp, i128 %new monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_block(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_block( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_block_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_block_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_block_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.cta.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("block") monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_cluster(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_cluster( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_cluster_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_cluster_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_cluster_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.cluster.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("cluster") monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_gpu(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_gpu( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_gpu_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_gpu_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_gpu_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.gpu.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new syncscope("device") monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_shared_cluster(ptr addrspace(7) %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_shared_cluster( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_shared_cluster_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_shared_cluster_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_shared_cluster_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.sys.shared::cluster.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr addrspace(7) %addr, i128 %cmp, i128 %new monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_monotonic_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_monotonic_monotonic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_monotonic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_monotonic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic monotonic ret i128 %new } define i128 @test_cmpxchg_monotonic_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_monotonic_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_acquire_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_acquire_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic acquire ret i128 %new } define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_monotonic_seq_cst( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new monotonic seq_cst ret i128 %new } define i128 @test_cmpxchg_acquire_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acquire_monotonic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_monotonic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_monotonic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire monotonic ret i128 %new } define i128 @test_cmpxchg_acquire_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acquire_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_acquire_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_acquire_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire acquire ret i128 %new } define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acquire_seq_cst( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acquire seq_cst ret i128 %new } define i128 @test_cmpxchg_release_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_release_monotonic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_monotonic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_monotonic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.release.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release monotonic ret i128 %new } define i128 @test_cmpxchg_release_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_release_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_acquire_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_acquire_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release acquire ret i128 %new } define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_release_seq_cst( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new release seq_cst ret i128 %new } define i128 @test_cmpxchg_acq_rel_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acq_rel_monotonic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_monotonic_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_monotonic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel monotonic ret i128 %new } define i128 @test_cmpxchg_acq_rel_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acq_rel_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_acquire_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_acquire_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acq_rel.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel acquire ret i128 %new } define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_acq_rel_seq_cst( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new acq_rel seq_cst ret i128 %new } define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_seq_cst_monotonic( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst monotonic ret i128 %new } define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_seq_cst_acquire( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst acquire ret i128 %new } define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) { ; CHECK-LABEL: test_cmpxchg_seq_cst_seq_cst( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0]; ; CHECK-NEXT: fence.sc.sys; ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2]; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd2, %rd3}; ; CHECK-NEXT: mov.b128 swap, {%rd4, %rd5}; ; CHECK-NEXT: atom.acquire.sys.cas.b128 dst, [%rd1], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd6, %rd7}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd4, %rd5}; ; CHECK-NEXT: ret; %pairold = cmpxchg ptr %addr, i128 %cmp, i128 %new seq_cst seq_cst ret i128 %new } define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_and( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_and_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_and_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4; ; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p1 bra $L__BB34_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw and ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_or( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_or_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_or_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4; ; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p1 bra $L__BB35_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw or ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_xor( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_xor_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_xor_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4; ; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p1 bra $L__BB36_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw xor ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_min( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<7>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_min_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_min_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5; ; CHECK-NEXT: or.pred %p5, %p3, %p4; ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p6 bra $L__BB37_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw min ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_max( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<7>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_max_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_max_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5; ; CHECK-NEXT: or.pred %p5, %p3, %p4; ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p6 bra $L__BB38_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw max ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_umin( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<7>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umin_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umin_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4; ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5; ; CHECK-NEXT: or.pred %p5, %p3, %p4; ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p6 bra $L__BB39_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw umin ptr %ptr, i128 %val monotonic ret i128 %ret } define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) { ; CHECK-LABEL: test_atomicrmw_umax( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<7>; ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_atomicrmw_umax_param_1]; ; CHECK-NEXT: ld.param.b64 %rd3, [test_atomicrmw_umax_param_0]; ; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3]; ; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4; ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5; ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5; ; CHECK-NEXT: or.pred %p5, %p3, %p4; ; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5; ; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 cmp, swap, dst; ; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12}; ; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6}; ; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap; ; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12; ; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11; ; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8; ; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0; ; CHECK-NEXT: mov.b64 %rd11, %rd1; ; CHECK-NEXT: mov.b64 %rd12, %rd2; ; CHECK-NEXT: @%p6 bra $L__BB40_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %ret = atomicrmw umax ptr %ptr, i128 %val monotonic ret i128 %ret } @si128 = internal addrspace(3) global i128 0, align 16 define void @test_atomicrmw_xchg_const() { ; CHECK-LABEL: test_atomicrmw_xchg_const( ; CHECK: { ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-NEXT: // demoted variable ; CHECK-NEXT: .shared .align 16 .b8 si128[16]; ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: mov.b64 %rd1, 0; ; CHECK-NEXT: mov.b64 %rd2, 23; ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b128 amt, dst; ; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1}; ; CHECK-NEXT: atom.relaxed.sys.shared.exch.b128 dst, [si128], amt; ; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst; ; CHECK-NEXT: } ; CHECK-NEXT: ret; %res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 monotonic ret void }