; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s | FileCheck %s target triple = "nvptx64-nvidia-cuda" @global_smem = external addrspace(3) global [0 x i8], align 16 ;; Confirm the mov.b64 of global_smem is CSE'd. We need to make things a bit ;; complex with a loop to make this interesting. define i32 @test_mov_sym(i32 %offset1, i32 %offset2, i1 %cond) { ; CHECK-LABEL: test_mov_sym( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; ; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b8 %rs1, [test_mov_sym_param_2]; ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; ; CHECK-NEXT: ld.param.b32 %r1, [test_mov_sym_param_0]; ; CHECK-NEXT: cvt.s64.s32 %rd1, %r1; ; CHECK-NEXT: mov.b64 %rd2, global_smem; ; CHECK-NEXT: add.s64 %rd3, %rd2, %rd1; ; CHECK-NEXT: ld.shared.b32 %r4, [%rd3]; ; CHECK-NEXT: not.pred %p2, %p1; ; CHECK-NEXT: @%p2 bra $L__BB0_4; ; CHECK-NEXT: // %bb.1: // %if1.preheader ; CHECK-NEXT: ld.param.b32 %r2, [test_mov_sym_param_1]; ; CHECK-NEXT: setp.ne.b32 %p3, %r1, %r2; ; CHECK-NEXT: $L__BB0_2: // %if1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: @%p3 bra $L__BB0_2; ; CHECK-NEXT: // %bb.3: // %if2 ; CHECK-NEXT: cvt.s64.s32 %rd4, %r2; ; CHECK-NEXT: add.s64 %rd5, %rd2, %rd4; ; CHECK-NEXT: ld.shared.b32 %r3, [%rd5]; ; CHECK-NEXT: add.s32 %r4, %r4, %r3; ; CHECK-NEXT: $L__BB0_4: // %end ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; entry: %gep = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset1 %val = load i32, ptr addrspace(3) %gep br i1 %cond, label %if1, label %end if1: %cond2 = icmp eq i32 %offset1, %offset2 br i1 %cond2, label %if2, label %if1 if2: %gep2 = getelementptr inbounds i8, ptr addrspace(3) @global_smem, i32 %offset2 %val2 = load i32, ptr addrspace(3) %gep2 %add = add i32 %val, %val2 br label %end end: %ret = phi i32 [ %add, %if2 ], [ %val, %entry ] ret i32 %ret }